From a1d84b35880eab8be671b3c0d05b535f46a7e9ae Mon Sep 17 00:00:00 2001 From: KomiMoe Date: Thu, 3 Oct 2024 11:32:29 +0800 Subject: [PATCH] bump to llvmorg-19.1.1 --- .github/workflows/release-binaries.yml | 8 - bolt/test/perf2bolt/lit.local.cfg | 5 +- clang/cmake/caches/Release.cmake | 7 +- clang/include/clang/AST/DeclBase.h | 3 + .../clang/Tooling/CompilationDatabase.h | 6 + clang/lib/AST/DeclBase.cpp | 4 + clang/lib/CodeGen/CGExprScalar.cpp | 26 +- clang/lib/Driver/ToolChains/Clang.cpp | 26 + clang/lib/Format/FormatTokenLexer.cpp | 7 + clang/lib/Format/TokenAnnotator.cpp | 5 - clang/lib/Sema/SemaConcept.cpp | 26 +- clang/lib/Sema/SemaDecl.cpp | 3 +- clang/lib/Tooling/CMakeLists.txt | 1 + .../Tooling/LocateToolCompilationDatabase.cpp | 71 +++ clang/test/ClangScanDeps/implicit-target.c | 31 ++ .../ClangScanDeps/modules-extern-submodule.c | 3 +- .../modules-full-output-tu-order.c | 6 +- .../modules-has-include-umbrella-header.c | 3 +- .../ClangScanDeps/modules-header-sharing.m | 3 +- .../modules-implementation-module-map.c | 3 +- .../modules-implementation-private.m | 3 +- .../ClangScanDeps/modules-priv-fw-from-pub.m | 3 +- .../ClangScanDeps/resolve-executable-path.c | 32 ++ clang/test/CodeGen/X86/x86-atomic-double.c | 88 ++-- .../test/CodeGen/X86/x86-atomic-long_double.c | 293 ++++++++++-- clang/test/Driver/debug-options-as.c | 17 +- clang/test/Modules/pr107673.cppm | 12 + clang/test/Modules/pr108732.cppm | 14 + .../SemaTemplate/concepts-out-of-line-def.cpp | 23 + clang/tools/clang-scan-deps/CMakeLists.txt | 1 + clang/tools/clang-scan-deps/ClangScanDeps.cpp | 14 +- clang/unittests/Format/FormatTest.cpp | 6 + cmake/Modules/LLVMVersion.cmake | 2 +- .../lib/sanitizer_common/sanitizer_linux.cpp | 12 + libcxx/include/__config | 2 +- .../time.zone.members/sys_info.zdump.pass.cpp | 5 +- .../tools/clang_tidy_checks/CMakeLists.txt | 2 + .../vendor/llvm/default_assertion_handler.in | 3 +- lld/ELF/ICF.cpp | 4 +- lld/ELF/InputSection.cpp | 6 +- lld/ELF/InputSection.h | 4 + lld/test/ELF/icf10.s | 3 + .../deque-dbg-info-content/Makefile | 4 +- .../list-dbg-info-content/Makefile | 4 +- .../vector-dbg-info-content/Makefile | 4 +- .../TestDbgInfoContentVectorFromStdModule.py | 1 + llvm/include/llvm/Analysis/AliasAnalysis.h | 2 +- llvm/lib/Analysis/AliasAnalysis.cpp | 2 + llvm/lib/CodeGen/InitUndef.cpp | 1 + llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 83 ++-- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 3 +- llvm/lib/ExecutionEngine/Orc/Core.cpp | 15 + llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 6 +- llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp | 9 +- .../LoongArch/LoongArchISelLowering.cpp | 6 +- .../Target/LoongArch/LoongArchInstrInfo.td | 5 +- .../LoongArch/LoongArchLASXInstrInfo.td | 6 + .../Target/LoongArch/LoongArchOptWInstrs.cpp | 15 + .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/lib/Target/X86/X86InstrAVX512.td | 4 +- llvm/lib/Transforms/Utils/InlineFunction.cpp | 20 +- llvm/lib/Transforms/Utils/LoopPeel.cpp | 2 +- .../CodeGen/AMDGPU/GlobalISel/flat-scratch.ll | 239 ++++++++++ llvm/test/CodeGen/AMDGPU/flat-scratch.ll | 444 ++++++++++++++++++ .../CodeGen/AVR/ldd-immediate-overflow.ll | 144 ++++++ .../CodeGen/AVR/std-immediate-overflow.ll | 137 ++++++ .../ir-instruction/load-store-atomic.ll | 119 +++++ .../ir-instruction/sdiv-udiv-srem-urem.ll | 61 +++ .../CodeGen/LoongArch/lasx/issue107355.ll | 35 ++ llvm/test/CodeGen/PowerPC/ldexp-libcall.ll | 4 +- llvm/test/CodeGen/PowerPC/ldexp.ll | 36 +- .../PowerPC/negative-integer-fp-libcall.ll | 26 + .../rvv/fixed-vectors-buildvec-of-binop.ll | 13 +- .../RISCV/rvv/fixed-vectors-masked-gather.ll | 96 ++-- .../test/CodeGen/RISCV/rvv/shuffle-reverse.ll | 60 +-- llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll | 24 + .../X86/fold-int-pow2-with-fmul-or-fdiv.ll | 69 ++- llvm/test/MC/Disassembler/X86/apx/kmov.txt | 16 + llvm/test/MC/X86/apx/kmov-att.s | 14 +- llvm/test/MC/X86/apx/kmov-intel.s | 12 + .../Inline/access-attributes-prop.ll | 21 + .../Inline/ret_attr_align_and_noundef.ll | 51 ++ llvm/test/Transforms/LoopUnroll/pr109333.ll | 104 ++++ .../tools/llvm-ml/rip_relative_addressing.asm | 12 +- llvm/utils/gn/secondary/llvm/version.gni | 2 +- llvm/utils/lit/lit/__init__.py | 2 +- llvm/utils/mlgo-utils/mlgo/__init__.py | 2 +- 88 files changed, 2388 insertions(+), 348 deletions(-) create mode 100644 clang/lib/Tooling/LocateToolCompilationDatabase.cpp create mode 100644 clang/test/ClangScanDeps/implicit-target.c create mode 100644 clang/test/ClangScanDeps/resolve-executable-path.c create mode 100644 clang/test/Modules/pr107673.cppm create mode 100644 clang/test/Modules/pr108732.cppm create mode 100644 llvm/test/CodeGen/AVR/ldd-immediate-overflow.ll create mode 100644 llvm/test/CodeGen/AVR/std-immediate-overflow.ll create mode 100644 llvm/test/CodeGen/LoongArch/lasx/issue107355.ll create mode 100644 llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll create mode 100644 llvm/test/Transforms/LoopUnroll/pr109333.ll diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index fcd371d49e6c..925912df6843 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -135,16 +135,8 @@ jobs: target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_DARWIN_osx_ARCHS=$arches -DBOOTSTRAP_DARWIN_osx_BUILTIN_ARCHS=$arches" fi - # x86 macOS and x86 Windows have trouble building flang, so disable it. - # Windows: https://github.com/llvm/llvm-project/issues/100202 - # macOS: 'rebase opcodes terminated early at offset 1 of 80016' when building __fortran_builtins.mod build_flang="true" - if [ "$target" = "Windows-X64" ]; then - target_cmake_flags="$target_cmake_flags -DLLVM_RELEASE_ENABLE_PROJECTS=\"clang;lld;lldb;clang-tools-extra;bolt;polly;mlir\"" - build_flang="false" - fi - if [ "${{ runner.os }}" = "Windows" ]; then # The build times out on Windows, so we need to disable LTO. target_cmake_flags="$target_cmake_flags -DLLVM_RELEASE_ENABLE_LTO=OFF" diff --git a/bolt/test/perf2bolt/lit.local.cfg b/bolt/test/perf2bolt/lit.local.cfg index 4ee9ad08cc78..0fecf913aa98 100644 --- a/bolt/test/perf2bolt/lit.local.cfg +++ b/bolt/test/perf2bolt/lit.local.cfg @@ -1,4 +1,5 @@ import shutil +import subprocess -if shutil.which("perf") is not None: - config.available_features.add("perf") \ No newline at end of file +if shutil.which("perf") is not None and subprocess.run(["perf", "record", "-e", "cycles:u", "-o", "/dev/null", "--", "perf", "--version"], capture_output=True).returncode == 0: + config.available_features.add("perf") diff --git a/clang/cmake/caches/Release.cmake b/clang/cmake/caches/Release.cmake index e5161dd9a27b..6d5f75ca0074 100644 --- a/clang/cmake/caches/Release.cmake +++ b/clang/cmake/caches/Release.cmake @@ -47,11 +47,14 @@ set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") set(STAGE1_PROJECTS "clang") -set(STAGE1_RUNTIMES "") + +# Building Flang on Windows requires compiler-rt, so we need to build it in +# stage1. compiler-rt is also required for building the Flang tests on +# macOS. +set(STAGE1_RUNTIMES "compiler-rt") if (LLVM_RELEASE_ENABLE_PGO) list(APPEND STAGE1_PROJECTS "lld") - list(APPEND STAGE1_RUNTIMES "compiler-rt") set(CLANG_BOOTSTRAP_TARGETS generate-profdata stage2-package diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 2a4bd0f9c2fd..04dbd1db6cba 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -680,6 +680,9 @@ class alignas(8) Decl { /// Whether this declaration comes from explicit global module. bool isFromExplicitGlobalModule() const; + /// Whether this declaration comes from global module. + bool isFromGlobalModule() const; + /// Whether this declaration comes from a named module. bool isInNamedModule() const; diff --git a/clang/include/clang/Tooling/CompilationDatabase.h b/clang/include/clang/Tooling/CompilationDatabase.h index fee584acb486..36fe0812ebe9 100644 --- a/clang/include/clang/Tooling/CompilationDatabase.h +++ b/clang/include/clang/Tooling/CompilationDatabase.h @@ -234,6 +234,12 @@ std::unique_ptr std::unique_ptr inferTargetAndDriverMode(std::unique_ptr Base); +/// Returns a wrapped CompilationDatabase that will transform argv[0] to an +/// absolute path, if it currently is a plain tool name, looking it up in +/// PATH. +std::unique_ptr +inferToolLocation(std::unique_ptr Base); + /// Returns a wrapped CompilationDatabase that will expand all rsp(response) /// files on commandline returned by underlying database. std::unique_ptr diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index b59f118380ca..c4e948a38e26 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1161,6 +1161,10 @@ bool Decl::isFromExplicitGlobalModule() const { return getOwningModule() && getOwningModule()->isExplicitGlobalModule(); } +bool Decl::isFromGlobalModule() const { + return getOwningModule() && getOwningModule()->isGlobalModule(); +} + bool Decl::isInNamedModule() const { return getOwningModule() && getOwningModule()->isNamedModule(); } diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index a17d68424bbc..6e212e74676e 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2833,18 +2833,22 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::AtomicOrdering::SequentiallyConsistent); return isPre ? Builder.CreateBinOp(op, old, amt) : old; } - // Special case for atomic increment/decrement on floats + // Special case for atomic increment/decrement on floats. + // Bail out non-power-of-2-sized floating point types (e.g., x86_fp80). if (type->isFloatingType()) { - llvm::AtomicRMWInst::BinOp aop = - isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub; - llvm::Instruction::BinaryOps op = - isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub; - llvm::Value *amt = llvm::ConstantFP::get( - VMContext, llvm::APFloat(static_cast(1.0))); - llvm::Value *old = - Builder.CreateAtomicRMW(aop, LV.getAddress(), amt, - llvm::AtomicOrdering::SequentiallyConsistent); - return isPre ? Builder.CreateBinOp(op, old, amt) : old; + llvm::Type *Ty = ConvertType(type); + if (llvm::has_single_bit(Ty->getScalarSizeInBits())) { + llvm::AtomicRMWInst::BinOp aop = + isInc ? llvm::AtomicRMWInst::FAdd : llvm::AtomicRMWInst::FSub; + llvm::Instruction::BinaryOps op = + isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub; + llvm::Value *amt = llvm::ConstantFP::get(Ty, 1.0); + llvm::AtomicRMWInst *old = Builder.CreateAtomicRMW( + aop, LV.getAddress(), amt, + llvm::AtomicOrdering::SequentiallyConsistent); + + return isPre ? Builder.CreateBinOp(op, old, amt) : old; + } } value = EmitLoadOfLValue(LV, E->getExprLoc()); input = value; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 366b147a052b..8858c318aba7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -8561,6 +8561,32 @@ void ClangAs::ConstructJob(Compilation &C, const JobAction &JA, WantDebug = !A->getOption().matches(options::OPT_g0) && !A->getOption().matches(options::OPT_ggdb0); + // If a -gdwarf argument appeared, remember it. + bool EmitDwarf = false; + if (const Arg *A = getDwarfNArg(Args)) + EmitDwarf = checkDebugInfoOption(A, Args, D, getToolChain()); + + bool EmitCodeView = false; + if (const Arg *A = Args.getLastArg(options::OPT_gcodeview)) + EmitCodeView = checkDebugInfoOption(A, Args, D, getToolChain()); + + // If the user asked for debug info but did not explicitly specify -gcodeview + // or -gdwarf, ask the toolchain for the default format. + if (!EmitCodeView && !EmitDwarf && WantDebug) { + switch (getToolChain().getDefaultDebugFormat()) { + case llvm::codegenoptions::DIF_CodeView: + EmitCodeView = true; + break; + case llvm::codegenoptions::DIF_DWARF: + EmitDwarf = true; + break; + } + } + + // If the arguments don't imply DWARF, don't emit any debug info here. + if (!EmitDwarf) + WantDebug = false; + llvm::codegenoptions::DebugInfoKind DebugInfoKind = llvm::codegenoptions::NoDebugInfo; diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index e21b5a882b77..63949b2e26bd 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -100,6 +100,13 @@ ArrayRef FormatTokenLexer::lex() { if (Tokens.back()->NewlinesBefore > 0 || Tokens.back()->IsMultiline) FirstInLineIndex = Tokens.size() - 1; } while (Tokens.back()->isNot(tok::eof)); + if (Style.InsertNewlineAtEOF) { + auto &TokEOF = *Tokens.back(); + if (TokEOF.NewlinesBefore == 0) { + TokEOF.NewlinesBefore = 1; + TokEOF.OriginalColumn = 0; + } + } return Tokens; } diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 3f00a28e6298..4512e539cc79 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3680,11 +3680,6 @@ void TokenAnnotator::annotate(AnnotatedLine &Line) { auto *First = Line.First; First->SpacesRequiredBefore = 1; First->CanBreakBefore = First->MustBreakBefore; - - if (First->is(tok::eof) && First->NewlinesBefore == 0 && - Style.InsertNewlineAtEOF) { - First->NewlinesBefore = 1; - } } // This function heuristically determines whether 'Current' starts the name of a diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp index c34d32002b5a..244f6ef2f53f 100644 --- a/clang/lib/Sema/SemaConcept.cpp +++ b/clang/lib/Sema/SemaConcept.cpp @@ -969,8 +969,30 @@ static const Expr *SubstituteConstraintExpressionWithoutSatisfaction( // equivalence. LocalInstantiationScope ScopeForParameters(S); if (auto *FD = DeclInfo.getDecl()->getAsFunction()) - for (auto *PVD : FD->parameters()) - ScopeForParameters.InstantiatedLocal(PVD, PVD); + for (auto *PVD : FD->parameters()) { + if (!PVD->isParameterPack()) { + ScopeForParameters.InstantiatedLocal(PVD, PVD); + continue; + } + // This is hacky: we're mapping the parameter pack to a size-of-1 argument + // to avoid building SubstTemplateTypeParmPackTypes for + // PackExpansionTypes. The SubstTemplateTypeParmPackType node would + // otherwise reference the AssociatedDecl of the template arguments, which + // is, in this case, the template declaration. + // + // However, as we are in the process of comparing potential + // re-declarations, the canonical declaration is the declaration itself at + // this point. So if we didn't expand these packs, we would end up with an + // incorrect profile difference because we will be profiling the + // canonical types! + // + // FIXME: Improve the "no-transform" machinery in FindInstantiatedDecl so + // that we can eliminate the Scope in the cases where the declarations are + // not necessarily instantiated. It would also benefit the noexcept + // specifier comparison. + ScopeForParameters.MakeInstantiatedLocalArgPack(PVD); + ScopeForParameters.InstantiatedLocalPackArg(PVD, PVD); + } std::optional ThisScope; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index d608dd92a4b4..717ddb833958 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -9732,8 +9732,9 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, // the function decl is created above). // FIXME: We need a better way to separate C++ standard and clang modules. bool ImplicitInlineCXX20 = !getLangOpts().CPlusPlusModules || + NewFD->isConstexpr() || NewFD->isConsteval() || !NewFD->getOwningModule() || - NewFD->isFromExplicitGlobalModule() || + NewFD->isFromGlobalModule() || NewFD->getOwningModule()->isHeaderLikeModule(); bool isInline = D.getDeclSpec().isInlineSpecified(); bool isVirtual = D.getDeclSpec().isVirtualSpecified(); diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt index 93a9e707a134..fc1f1f9f9d36 100644 --- a/clang/lib/Tooling/CMakeLists.txt +++ b/clang/lib/Tooling/CMakeLists.txt @@ -25,6 +25,7 @@ add_clang_library(clangTooling GuessTargetAndModeCompilationDatabase.cpp InterpolatingCompilationDatabase.cpp JSONCompilationDatabase.cpp + LocateToolCompilationDatabase.cpp Refactoring.cpp RefactoringCallbacks.cpp StandaloneExecution.cpp diff --git a/clang/lib/Tooling/LocateToolCompilationDatabase.cpp b/clang/lib/Tooling/LocateToolCompilationDatabase.cpp new file mode 100644 index 000000000000..033f69f3760c --- /dev/null +++ b/clang/lib/Tooling/LocateToolCompilationDatabase.cpp @@ -0,0 +1,71 @@ +//===- GuessTargetAndModeCompilationDatabase.cpp --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/CompilationDatabase.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Program.h" +#include + +namespace clang { +namespace tooling { + +namespace { +class LocationAdderDatabase : public CompilationDatabase { +public: + LocationAdderDatabase(std::unique_ptr Base) + : Base(std::move(Base)) { + assert(this->Base != nullptr); + } + + std::vector getAllFiles() const override { + return Base->getAllFiles(); + } + + std::vector getAllCompileCommands() const override { + return addLocation(Base->getAllCompileCommands()); + } + + std::vector + getCompileCommands(StringRef FilePath) const override { + return addLocation(Base->getCompileCommands(FilePath)); + } + +private: + std::vector + addLocation(std::vector Cmds) const { + for (auto &Cmd : Cmds) { + if (Cmd.CommandLine.empty()) + continue; + std::string &Driver = Cmd.CommandLine.front(); + // If the driver name already is absolute, we don't need to do anything. + if (llvm::sys::path::is_absolute(Driver)) + continue; + // If the name is a relative path, like bin/clang, we assume it's + // possible to resolve it and don't do anything about it either. + if (llvm::any_of(Driver, + [](char C) { return llvm::sys::path::is_separator(C); })) + continue; + auto Absolute = llvm::sys::findProgramByName(Driver); + // If we found it in path, update the entry in Cmd.CommandLine + if (Absolute && llvm::sys::path::is_absolute(*Absolute)) + Driver = std::move(*Absolute); + } + return Cmds; + } + std::unique_ptr Base; +}; +} // namespace + +std::unique_ptr +inferToolLocation(std::unique_ptr Base) { + return std::make_unique(std::move(Base)); +} + +} // namespace tooling +} // namespace clang diff --git a/clang/test/ClangScanDeps/implicit-target.c b/clang/test/ClangScanDeps/implicit-target.c new file mode 100644 index 000000000000..cf757f937331 --- /dev/null +++ b/clang/test/ClangScanDeps/implicit-target.c @@ -0,0 +1,31 @@ +// Check that we can detect an implicit target when clang is invoked as +// clang. Using an implicit triple requires that the target actually +// is available, too. +// REQUIRES: x86-registered-target + +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json + +// Check that we can deduce this both when using a compilation database, and when using +// a literal command line. + +// RUN: clang-scan-deps -format experimental-full -compilation-database %t/cdb.json | FileCheck %s + +// RUN: clang-scan-deps -format experimental-full -- x86_64-w64-mingw32-clang %t/source.c -o %t/source.o | FileCheck %s + +// CHECK: "-triple", +// CHECK-NEXT: "x86_64-w64-windows-gnu", + + +//--- cdb.json.in +[ + { + "directory": "DIR" + "command": "x86_64-w64-mingw32-clang -c DIR/source.c -o DIR/source.o" + "file": "DIR/source.c" + }, +] + +//--- source.c +void func(void) {} diff --git a/clang/test/ClangScanDeps/modules-extern-submodule.c b/clang/test/ClangScanDeps/modules-extern-submodule.c index 92f2c749dd2c..6e9082b0165f 100644 --- a/clang/test/ClangScanDeps/modules-extern-submodule.c +++ b/clang/test/ClangScanDeps/modules-extern-submodule.c @@ -112,8 +112,7 @@ module third {} // CHECK: "-fmodule-map-file=[[PREFIX]]/first/first/module.modulemap", // CHECK: "-fmodule-file=first=[[PREFIX]]/cache/{{.*}}/first-{{.*}}.pcm", // CHECK: ], -// CHECK-NEXT: "executable": "clang", -// CHECK-NEXT: "file-deps": [ +// CHECK: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.m" // CHECK-NEXT: ], // CHECK-NEXT: "input-file": "[[PREFIX]]/tu.c" diff --git a/clang/test/ClangScanDeps/modules-full-output-tu-order.c b/clang/test/ClangScanDeps/modules-full-output-tu-order.c index 04939826817f..065b8ac8ae07 100644 --- a/clang/test/ClangScanDeps/modules-full-output-tu-order.c +++ b/clang/test/ClangScanDeps/modules-full-output-tu-order.c @@ -35,8 +35,7 @@ // CHECK: "-D" // CHECK-NEXT: "ONE" // CHECK: ], -// CHECK-NEXT: "executable": "clang", -// CHECK-NEXT: "file-deps": [ +// CHECK: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.c" // CHECK-NEXT: ], // CHECK-NEXT: "input-file": "[[PREFIX]]/tu.c" @@ -52,8 +51,7 @@ // CHECK: "-D" // CHECK-NEXT: "TWO" // CHECK: ], -// CHECK-NEXT: "executable": "clang", -// CHECK-NEXT: "file-deps": [ +// CHECK: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.c" // CHECK-NEXT: ], // CHECK-NEXT: "input-file": "[[PREFIX]]/tu.c" diff --git a/clang/test/ClangScanDeps/modules-has-include-umbrella-header.c b/clang/test/ClangScanDeps/modules-has-include-umbrella-header.c index e9363b2e14b0..022c59ca65db 100644 --- a/clang/test/ClangScanDeps/modules-has-include-umbrella-header.c +++ b/clang/test/ClangScanDeps/modules-has-include-umbrella-header.c @@ -64,8 +64,7 @@ module Dependency { header "dependency.h" } // CHECK: ], // CHECK-NEXT: "command-line": [ // CHECK: ], -// CHECK-NEXT: "executable": "clang", -// CHECK-NEXT: "file-deps": [ +// CHECK: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.c" // CHECK-NEXT: ], // CHECK-NEXT: "input-file": "[[PREFIX]]/tu.c" diff --git a/clang/test/ClangScanDeps/modules-header-sharing.m b/clang/test/ClangScanDeps/modules-header-sharing.m index ec94923ae8ee..31ef351ec38b 100644 --- a/clang/test/ClangScanDeps/modules-header-sharing.m +++ b/clang/test/ClangScanDeps/modules-header-sharing.m @@ -77,8 +77,7 @@ // CHECK: "-fmodule-map-file=[[PREFIX]]/frameworks/A.framework/Modules/module.modulemap", // CHECK: "-fmodule-name=A", // CHECK: ], -// CHECK-NEXT: "executable": "clang", -// CHECK-NEXT: "file-deps": [ +// CHECK: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.m", // CHECK-NEXT: "[[PREFIX]]/shared/H.h" // CHECK-NEXT: ], diff --git a/clang/test/ClangScanDeps/modules-implementation-module-map.c b/clang/test/ClangScanDeps/modules-implementation-module-map.c index d76d31570046..b7637d0c9143 100644 --- a/clang/test/ClangScanDeps/modules-implementation-module-map.c +++ b/clang/test/ClangScanDeps/modules-implementation-module-map.c @@ -27,8 +27,7 @@ framework module FWPrivate { header "private.h" } // CHECK: "-fmodule-map-file=[[PREFIX]]/frameworks/FW.framework/Modules/module.private.modulemap", // CHECK: "-fmodule-name=FWPrivate", // CHECK: ], -// CHECK-NEXT: "executable": "clang", -// CHECK-NEXT: "file-deps": [ +// CHECK: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.m" // CHECK-NEXT: ], // CHECK-NEXT: "input-file": "[[PREFIX]]/tu.m" diff --git a/clang/test/ClangScanDeps/modules-implementation-private.m b/clang/test/ClangScanDeps/modules-implementation-private.m index acc01017d664..b376073f4b9e 100644 --- a/clang/test/ClangScanDeps/modules-implementation-private.m +++ b/clang/test/ClangScanDeps/modules-implementation-private.m @@ -62,8 +62,7 @@ // CHECK-NEXT: ], // CHECK-NEXT: "command-line": [ // CHECK: ], -// CHECK-NEXT: "executable": "clang", -// CHECK-NEXT: "file-deps": [ +// CHECK: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.m", // CHECK-NEXT: "[[PREFIX]]/frameworks/FW.framework/PrivateHeaders/Missed.h", // CHECK-NEXT: "[[PREFIX]]/frameworks/FW.framework/Headers/FW.h" diff --git a/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m b/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m index 4847fedac3bf..e96130884636 100644 --- a/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m +++ b/clang/test/ClangScanDeps/modules-priv-fw-from-pub.m @@ -110,8 +110,7 @@ // CHECK-NEXT: ], // CHECK-NEXT: "command-line": [ // CHECK: ], -// CHECK-NEXT: "executable": "clang", -// CHECK-NEXT: "file-deps": [ +// CHECK: "file-deps": [ // CHECK-NEXT: "[[PREFIX]]/tu.m" // CHECK-NEXT: ], // CHECK-NEXT: "input-file": "[[PREFIX]]/tu.m" diff --git a/clang/test/ClangScanDeps/resolve-executable-path.c b/clang/test/ClangScanDeps/resolve-executable-path.c new file mode 100644 index 000000000000..63e34ca256a8 --- /dev/null +++ b/clang/test/ClangScanDeps/resolve-executable-path.c @@ -0,0 +1,32 @@ +// UNSUPPORTED: system-windows + +// Check that we expand the executable name to an absolute path, when invoked +// with a plain executable name, which is implied to be found in PATH. +// REQUIRES: x86-registered-target + +// RUN: rm -rf %t +// RUN: mkdir -p %t/bin +// RUN: ln -s %clang %t/bin/x86_64-w64-mingw32-clang +// RUN: split-file %s %t +// RUN: sed -e "s|DIR|%/t|g" %t/cdb.json.in > %t/cdb.json + +// Check that we can deduce this both when using a compilation database, and when using +// a literal command line. + +// RUN: env "PATH=%t/bin:%PATH%" clang-scan-deps -format experimental-full -compilation-database %t/cdb.json | FileCheck %s -DBASE=%/t + +// RUN: env "PATH=%t/bin:%PATH%" clang-scan-deps -format experimental-full -- x86_64-w64-mingw32-clang %t/source.c -o %t/source.o | FileCheck %s -DBASE=%/t + +// CHECK: "executable": "[[BASE]]/bin/x86_64-w64-mingw32-clang" + +//--- cdb.json.in +[ + { + "directory": "DIR" + "command": "x86_64-w64-mingw32-clang -c DIR/source.c -o DIR/source.o" + "file": "DIR/source.c" + }, +] + +//--- source.c +void func(void) {} diff --git a/clang/test/CodeGen/X86/x86-atomic-double.c b/clang/test/CodeGen/X86/x86-atomic-double.c index 2354c89cc2b1..09c8f70c3db8 100644 --- a/clang/test/CodeGen/X86/x86-atomic-double.c +++ b/clang/test/CodeGen/X86/x86-atomic-double.c @@ -6,20 +6,14 @@ // X64-LABEL: define dso_local double @test_double_post_inc( // X64-SAME: ) #[[ATTR0:[0-9]+]] { // X64-NEXT: entry: -// X64-NEXT: [[RETVAL:%.*]] = alloca double, align 8 -// X64-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, float 1.000000e+00 seq_cst, align 8 -// X64-NEXT: store float [[TMP0]], ptr [[RETVAL]], align 8 -// X64-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8 -// X64-NEXT: ret double [[TMP1]] +// X64-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, double 1.000000e+00 seq_cst, align 8 +// X64-NEXT: ret double [[TMP0]] // // X86-LABEL: define dso_local double @test_double_post_inc( // X86-SAME: ) #[[ATTR0:[0-9]+]] { // X86-NEXT: entry: -// X86-NEXT: [[RETVAL:%.*]] = alloca double, align 4 -// X86-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, float 1.000000e+00 seq_cst, align 8 -// X86-NEXT: store float [[TMP0]], ptr [[RETVAL]], align 4 -// X86-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 4 -// X86-NEXT: ret double [[TMP1]] +// X86-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_post_inc.n, double 1.000000e+00 seq_cst, align 8 +// X86-NEXT: ret double [[TMP0]] // double test_double_post_inc() { @@ -30,20 +24,14 @@ double test_double_post_inc() // X64-LABEL: define dso_local double @test_double_post_dc( // X64-SAME: ) #[[ATTR0]] { // X64-NEXT: entry: -// X64-NEXT: [[RETVAL:%.*]] = alloca double, align 8 -// X64-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, float 1.000000e+00 seq_cst, align 8 -// X64-NEXT: store float [[TMP0]], ptr [[RETVAL]], align 8 -// X64-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 8 -// X64-NEXT: ret double [[TMP1]] +// X64-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, double 1.000000e+00 seq_cst, align 8 +// X64-NEXT: ret double [[TMP0]] // // X86-LABEL: define dso_local double @test_double_post_dc( // X86-SAME: ) #[[ATTR0]] { // X86-NEXT: entry: -// X86-NEXT: [[RETVAL:%.*]] = alloca double, align 4 -// X86-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, float 1.000000e+00 seq_cst, align 8 -// X86-NEXT: store float [[TMP0]], ptr [[RETVAL]], align 4 -// X86-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL]], align 4 -// X86-NEXT: ret double [[TMP1]] +// X86-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_post_dc.n, double 1.000000e+00 seq_cst, align 8 +// X86-NEXT: ret double [[TMP0]] // double test_double_post_dc() { @@ -54,22 +42,16 @@ double test_double_post_dc() // X64-LABEL: define dso_local double @test_double_pre_dc( // X64-SAME: ) #[[ATTR0]] { // X64-NEXT: entry: -// X64-NEXT: [[RETVAL:%.*]] = alloca double, align 8 -// X64-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, float 1.000000e+00 seq_cst, align 8 -// X64-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 -// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 8 -// X64-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 8 -// X64-NEXT: ret double [[TMP2]] +// X64-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, double 1.000000e+00 seq_cst, align 8 +// X64-NEXT: [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00 +// X64-NEXT: ret double [[TMP1]] // // X86-LABEL: define dso_local double @test_double_pre_dc( // X86-SAME: ) #[[ATTR0]] { // X86-NEXT: entry: -// X86-NEXT: [[RETVAL:%.*]] = alloca double, align 4 -// X86-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, float 1.000000e+00 seq_cst, align 8 -// X86-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00 -// X86-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 -// X86-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 4 -// X86-NEXT: ret double [[TMP2]] +// X86-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr @test_double_pre_dc.n, double 1.000000e+00 seq_cst, align 8 +// X86-NEXT: [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00 +// X86-NEXT: ret double [[TMP1]] // double test_double_pre_dc() { @@ -80,25 +62,43 @@ double test_double_pre_dc() // X64-LABEL: define dso_local double @test_double_pre_inc( // X64-SAME: ) #[[ATTR0]] { // X64-NEXT: entry: -// X64-NEXT: [[RETVAL:%.*]] = alloca double, align 8 -// X64-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, float 1.000000e+00 seq_cst, align 8 -// X64-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 -// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 8 -// X64-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 8 -// X64-NEXT: ret double [[TMP2]] +// X64-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, double 1.000000e+00 seq_cst, align 8 +// X64-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00 +// X64-NEXT: ret double [[TMP1]] // // X86-LABEL: define dso_local double @test_double_pre_inc( // X86-SAME: ) #[[ATTR0]] { // X86-NEXT: entry: -// X86-NEXT: [[RETVAL:%.*]] = alloca double, align 4 -// X86-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, float 1.000000e+00 seq_cst, align 8 -// X86-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00 -// X86-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 -// X86-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL]], align 4 -// X86-NEXT: ret double [[TMP2]] +// X86-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @test_double_pre_inc.n, double 1.000000e+00 seq_cst, align 8 +// X86-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00 +// X86-NEXT: ret double [[TMP1]] // double test_double_pre_inc() { static _Atomic double n; return ++n; } + +// X64-LABEL: define dso_local i32 @pr107054( +// X64-SAME: ) #[[ATTR0]] { +// X64-NEXT: entry: +// X64-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @pr107054.n, double 1.000000e+00 seq_cst, align 8 +// X64-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00 +// X64-NEXT: [[CMP:%.*]] = fcmp oeq double [[TMP1]], 1.000000e+00 +// X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// X64-NEXT: ret i32 [[CONV]] +// +// X86-LABEL: define dso_local i32 @pr107054( +// X86-SAME: ) #[[ATTR0]] { +// X86-NEXT: entry: +// X86-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr @pr107054.n, double 1.000000e+00 seq_cst, align 8 +// X86-NEXT: [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00 +// X86-NEXT: [[CMP:%.*]] = fcmp oeq double [[TMP1]], 1.000000e+00 +// X86-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// X86-NEXT: ret i32 [[CONV]] +// +int pr107054() +{ + static _Atomic double n; + return (++n) == 1; +} diff --git a/clang/test/CodeGen/X86/x86-atomic-long_double.c b/clang/test/CodeGen/X86/x86-atomic-long_double.c index 2c3f381f1351..9c82784807da 100644 --- a/clang/test/CodeGen/X86/x86-atomic-long_double.c +++ b/clang/test/CodeGen/X86/x86-atomic-long_double.c @@ -4,29 +4,60 @@ // X64-LABEL: define dso_local x86_fp80 @testinc( // X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { -// X64-NEXT: [[ENTRY:.*:]] -// X64-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ENTRY:.*]]: // X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16 // X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 // X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// X64-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 -// X64-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 -// X64-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16 -// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 -// X64-NEXT: ret x86_fp80 [[TMP3]] +// X64-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP0]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: br label %[[ATOMIC_OP:.*]] +// X64: [[ATOMIC_OP]]: +// X64-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ] +// X64-NEXT: [[INC:%.*]] = fadd x86_fp80 [[TMP2]], 0xK3FFF8000000000000000 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16 +// X64-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0 +// X64-NEXT: [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1 +// X64-NEXT: store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X64: [[ATOMIC_CONT]]: +// X64-NEXT: ret x86_fp80 [[INC]] // // X86-LABEL: define dso_local x86_fp80 @testinc( // X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0:[0-9]+]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ENTRY:.*]]: // X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4 // X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// X86-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 -// X86-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 -// X86-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 4 -// X86-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 -// X86-NEXT: ret x86_fp80 [[TMP3]] +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) +// X86-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4 +// X86-NEXT: br label %[[ATOMIC_OP:.*]] +// X86: [[ATOMIC_OP]]: +// X86-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ] +// X86-NEXT: [[INC:%.*]] = fadd x86_fp80 [[TMP2]], 0xK3FFF8000000000000000 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 4 +// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5) +// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X86: [[ATOMIC_CONT]]: +// X86-NEXT: ret x86_fp80 [[INC]] // long double testinc(_Atomic long double *addr) { @@ -35,27 +66,60 @@ long double testinc(_Atomic long double *addr) { // X64-LABEL: define dso_local x86_fp80 @testdec( // X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// X64-NEXT: [[ENTRY:.*:]] -// X64-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ENTRY:.*]]: // X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16 // X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 // X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// X64-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 -// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16 -// X64-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 -// X64-NEXT: ret x86_fp80 [[TMP2]] +// X64-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr [[TMP0]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: br label %[[ATOMIC_OP:.*]] +// X64: [[ATOMIC_OP]]: +// X64-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ] +// X64-NEXT: [[DEC:%.*]] = fadd x86_fp80 [[TMP2]], 0xKBFFF8000000000000000 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[DEC]], ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP5:%.*]] = cmpxchg ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16 +// X64-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0 +// X64-NEXT: [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1 +// X64-NEXT: store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X64: [[ATOMIC_CONT]]: +// X64-NEXT: ret x86_fp80 [[TMP1]] // // X86-LABEL: define dso_local x86_fp80 @testdec( // X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ENTRY:.*]]: // X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4 // X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// X86-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 -// X86-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 -// X86-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 -// X86-NEXT: ret x86_fp80 [[TMP2]] +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) +// X86-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4 +// X86-NEXT: br label %[[ATOMIC_OP:.*]] +// X86: [[ATOMIC_OP]]: +// X86-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ] +// X86-NEXT: [[DEC:%.*]] = fadd x86_fp80 [[TMP2]], 0xKBFFF8000000000000000 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[DEC]], ptr [[ATOMIC_TEMP2]], align 4 +// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5) +// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X86: [[ATOMIC_CONT]]: +// X86-NEXT: ret x86_fp80 [[TMP1]] // long double testdec(_Atomic long double *addr) { @@ -175,29 +239,60 @@ long double testassign(_Atomic long double *addr) { // X64-LABEL: define dso_local x86_fp80 @test_volatile_inc( // X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// X64-NEXT: [[ENTRY:.*:]] -// X64-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ENTRY:.*]]: // X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16 // X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 // X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// X64-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 -// X64-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 -// X64-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 16 -// X64-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 -// X64-NEXT: ret x86_fp80 [[TMP3]] +// X64-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP0]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: br label %[[ATOMIC_OP:.*]] +// X64: [[ATOMIC_OP]]: +// X64-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ] +// X64-NEXT: [[INC:%.*]] = fadd x86_fp80 [[TMP2]], 0xK3FFF8000000000000000 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP5:%.*]] = cmpxchg volatile ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16 +// X64-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0 +// X64-NEXT: [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1 +// X64-NEXT: store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X64: [[ATOMIC_CONT]]: +// X64-NEXT: ret x86_fp80 [[INC]] // // X86-LABEL: define dso_local x86_fp80 @test_volatile_inc( // X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ENTRY:.*]]: // X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4 // X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// X86-NEXT: [[TMP1:%.*]] = atomicrmw fadd ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 -// X86-NEXT: [[TMP2:%.*]] = fadd float [[TMP1]], 1.000000e+00 -// X86-NEXT: store float [[TMP2]], ptr [[RETVAL]], align 4 -// X86-NEXT: [[TMP3:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 -// X86-NEXT: ret x86_fp80 [[TMP3]] +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) +// X86-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4 +// X86-NEXT: br label %[[ATOMIC_OP:.*]] +// X86: [[ATOMIC_OP]]: +// X86-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ] +// X86-NEXT: [[INC:%.*]] = fadd x86_fp80 [[TMP2]], 0xK3FFF8000000000000000 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 4 +// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5) +// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X86: [[ATOMIC_CONT]]: +// X86-NEXT: ret x86_fp80 [[INC]] // long double test_volatile_inc(volatile _Atomic long double *addr) { return ++*addr; @@ -205,27 +300,60 @@ long double test_volatile_inc(volatile _Atomic long double *addr) { // X64-LABEL: define dso_local x86_fp80 @test_volatile_dec( // X64-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// X64-NEXT: [[ENTRY:.*:]] -// X64-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ENTRY:.*]]: // X64-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 8 +// X64-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16 // X64-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 8 // X64-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 8 -// X64-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 16 -// X64-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 16 -// X64-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 16 -// X64-NEXT: ret x86_fp80 [[TMP2]] +// X64-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic volatile i128, ptr [[TMP0]] seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: br label %[[ATOMIC_OP:.*]] +// X64: [[ATOMIC_OP]]: +// X64-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP8:%.*]], %[[ATOMIC_OP]] ] +// X64-NEXT: [[DEC:%.*]] = fadd x86_fp80 [[TMP2]], 0xKBFFF8000000000000000 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[DEC]], ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP4:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP5:%.*]] = cmpxchg volatile ptr [[TMP0]], i128 [[TMP3]], i128 [[TMP4]] seq_cst seq_cst, align 16 +// X64-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP5]], 0 +// X64-NEXT: [[TMP7:%.*]] = extractvalue { i128, i1 } [[TMP5]], 1 +// X64-NEXT: store i128 [[TMP6]], ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: [[TMP8]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: br i1 [[TMP7]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X64: [[ATOMIC_CONT]]: +// X64-NEXT: ret x86_fp80 [[TMP1]] // // X86-LABEL: define dso_local x86_fp80 @test_volatile_dec( // X86-SAME: ptr noundef [[ADDR:%.*]]) #[[ATTR0]] { -// X86-NEXT: [[ENTRY:.*:]] -// X86-NEXT: [[RETVAL:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ENTRY:.*]]: // X86-NEXT: [[ADDR_ADDR:%.*]] = alloca ptr, align 4 +// X86-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4 // X86-NEXT: store ptr [[ADDR]], ptr [[ADDR_ADDR]], align 4 // X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ADDR_ADDR]], align 4 -// X86-NEXT: [[TMP1:%.*]] = atomicrmw fsub ptr [[TMP0]], float 1.000000e+00 seq_cst, align 4 -// X86-NEXT: store float [[TMP1]], ptr [[RETVAL]], align 4 -// X86-NEXT: [[TMP2:%.*]] = load x86_fp80, ptr [[RETVAL]], align 4 -// X86-NEXT: ret x86_fp80 [[TMP2]] +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) +// X86-NEXT: [[TMP1:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4 +// X86-NEXT: br label %[[ATOMIC_OP:.*]] +// X86: [[ATOMIC_OP]]: +// X86-NEXT: [[TMP2:%.*]] = phi x86_fp80 [ [[TMP1]], %[[ENTRY]] ], [ [[TMP3:%.*]], %[[ATOMIC_OP]] ] +// X86-NEXT: [[DEC:%.*]] = fadd x86_fp80 [[TMP2]], 0xKBFFF8000000000000000 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[TMP2]], ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[DEC]], ptr [[ATOMIC_TEMP2]], align 4 +// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef [[TMP0]], ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5) +// X86-NEXT: [[TMP3]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X86: [[ATOMIC_CONT]]: +// X86-NEXT: ret x86_fp80 [[TMP1]] // long double test_volatile_dec(volatile _Atomic long double *addr) { return (*addr)--; @@ -341,3 +469,64 @@ long double test_volatile_assign(volatile _Atomic long double *addr) { return *addr; } + +// X64-LABEL: define dso_local i32 @pr107054( +// X64-SAME: ) #[[ATTR0]] { +// X64-NEXT: [[ENTRY:.*]]: +// X64-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_TEMP3:%.*]] = alloca x86_fp80, align 16 +// X64-NEXT: [[ATOMIC_LOAD:%.*]] = load atomic i128, ptr @pr107054.n seq_cst, align 16 +// X64-NEXT: store i128 [[ATOMIC_LOAD]], ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 16 +// X64-NEXT: br label %[[ATOMIC_OP:.*]] +// X64: [[ATOMIC_OP]]: +// X64-NEXT: [[TMP1:%.*]] = phi x86_fp80 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP7:%.*]], %[[ATOMIC_OP]] ] +// X64-NEXT: [[INC:%.*]] = fadd x86_fp80 [[TMP1]], 0xK3FFF8000000000000000 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP1]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: [[TMP2:%.*]] = load i128, ptr [[ATOMIC_TEMP1]], align 16 +// X64-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[ATOMIC_TEMP2]], i8 0, i64 16, i1 false) +// X64-NEXT: store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP3:%.*]] = load i128, ptr [[ATOMIC_TEMP2]], align 16 +// X64-NEXT: [[TMP4:%.*]] = cmpxchg ptr @pr107054.n, i128 [[TMP2]], i128 [[TMP3]] seq_cst seq_cst, align 16 +// X64-NEXT: [[TMP5:%.*]] = extractvalue { i128, i1 } [[TMP4]], 0 +// X64-NEXT: [[TMP6:%.*]] = extractvalue { i128, i1 } [[TMP4]], 1 +// X64-NEXT: store i128 [[TMP5]], ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: [[TMP7]] = load x86_fp80, ptr [[ATOMIC_TEMP3]], align 16 +// X64-NEXT: br i1 [[TMP6]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X64: [[ATOMIC_CONT]]: +// X64-NEXT: [[CMP:%.*]] = fcmp oeq x86_fp80 [[INC]], 0xK3FFF8000000000000000 +// X64-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// X64-NEXT: ret i32 [[CONV]] +// +// X86-LABEL: define dso_local i32 @pr107054( +// X86-SAME: ) #[[ATTR0]] { +// X86-NEXT: [[ENTRY:.*]]: +// X86-NEXT: [[ATOMIC_TEMP:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP1:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: [[ATOMIC_TEMP2:%.*]] = alloca x86_fp80, align 4 +// X86-NEXT: call void @__atomic_load(i32 noundef 12, ptr noundef @pr107054.n, ptr noundef [[ATOMIC_TEMP]], i32 noundef 5) +// X86-NEXT: [[TMP0:%.*]] = load x86_fp80, ptr [[ATOMIC_TEMP]], align 4 +// X86-NEXT: br label %[[ATOMIC_OP:.*]] +// X86: [[ATOMIC_OP]]: +// X86-NEXT: [[TMP1:%.*]] = phi x86_fp80 [ [[TMP0]], %[[ENTRY]] ], [ [[TMP2:%.*]], %[[ATOMIC_OP]] ] +// X86-NEXT: [[INC:%.*]] = fadd x86_fp80 [[TMP1]], 0xK3FFF8000000000000000 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP1]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[TMP1]], ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[ATOMIC_TEMP2]], i8 0, i64 12, i1 false) +// X86-NEXT: store x86_fp80 [[INC]], ptr [[ATOMIC_TEMP2]], align 4 +// X86-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i32 noundef 12, ptr noundef @pr107054.n, ptr noundef [[ATOMIC_TEMP1]], ptr noundef [[ATOMIC_TEMP2]], i32 noundef 5, i32 noundef 5) +// X86-NEXT: [[TMP2]] = load x86_fp80, ptr [[ATOMIC_TEMP1]], align 4 +// X86-NEXT: br i1 [[CALL]], label %[[ATOMIC_CONT:.*]], label %[[ATOMIC_OP]] +// X86: [[ATOMIC_CONT]]: +// X86-NEXT: [[CMP:%.*]] = fcmp oeq x86_fp80 [[INC]], 0xK3FFF8000000000000000 +// X86-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// X86-NEXT: ret i32 [[CONV]] +// +int pr107054() +{ + static _Atomic long double n; + return (++n) == 1; +} diff --git a/clang/test/Driver/debug-options-as.c b/clang/test/Driver/debug-options-as.c index c83c0cb90431..cb0492177ff4 100644 --- a/clang/test/Driver/debug-options-as.c +++ b/clang/test/Driver/debug-options-as.c @@ -19,12 +19,27 @@ // GGDB0-NOT: -debug-info-kind= // Check to make sure clang with -g on a .s file gets passed. -// RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \ +// This requires a target that defaults to DWARF. +// RUN: %clang -### --target=x86_64-linux-gnu -c -integrated-as -g -x assembler %s 2>&1 \ // RUN: | FileCheck %s // // CHECK: "-cc1as" // CHECK: "-debug-info-kind=constructor" +// Check that a plain -g, without any -gdwarf, for a MSVC target, doesn't +// trigger producing DWARF output. +// RUN: %clang -### --target=x86_64-windows-msvc -c -integrated-as -g -x assembler %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MSVC %s +// +// MSVC: "-cc1as" +// MSVC-NOT: "-debug-info-kind=constructor" + +// Check that clang-cl with the -Z7 option works the same, not triggering +// any DWARF output. +// +// RUN: %clang_cl -### --target=x86_64-pc-windows-msvc -c -Z7 -x assembler -- %s 2>&1 \ +// RUN: | FileCheck -check-prefix=MSVC %s + // Check to make sure clang with -g on a .s file gets passed -dwarf-debug-producer. // RUN: %clang -### -c -integrated-as -g -x assembler %s 2>&1 \ // RUN: | FileCheck -check-prefix=P %s diff --git a/clang/test/Modules/pr107673.cppm b/clang/test/Modules/pr107673.cppm new file mode 100644 index 000000000000..dc66c9ac2245 --- /dev/null +++ b/clang/test/Modules/pr107673.cppm @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++20 %s -ast-dump | FileCheck %s +export module a; +export class f { +public: + void non_inline_func() {} + constexpr void constexpr_func() {} + consteval void consteval_func() {} +}; + +// CHECK-NOT: non_inline_func {{.*}}implicit-inline +// CHECK: constexpr_func {{.*}}implicit-inline +// CHECK: consteval_func {{.*}}implicit-inline diff --git a/clang/test/Modules/pr108732.cppm b/clang/test/Modules/pr108732.cppm new file mode 100644 index 000000000000..f3b495aa826c --- /dev/null +++ b/clang/test/Modules/pr108732.cppm @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -std=c++20 %s -ast-dump | FileCheck %s +export module mod; + +extern "C++" { +class C +{ +public: +bool foo() const { + return true; +} +}; +} + +// CHECK: foo {{.*}}implicit-inline diff --git a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp index 0142efcdc3ee..333187b0d74a 100644 --- a/clang/test/SemaTemplate/concepts-out-of-line-def.cpp +++ b/clang/test/SemaTemplate/concepts-out-of-line-def.cpp @@ -599,3 +599,26 @@ template unsigned long DerivedCollection::index() {} } // namespace GH72557 + +namespace GH101735 { + +template +concept True = true; + +template +class A { + template + void method(Ts&... ts) + requires requires (T t) { + { t.method(static_cast(ts)...) } -> True; + }; +}; + +template +template +void A::method(Ts&... ts) + requires requires (T t) { + { t.method(static_cast(ts)...) } -> True; + } {} + +} diff --git a/clang/tools/clang-scan-deps/CMakeLists.txt b/clang/tools/clang-scan-deps/CMakeLists.txt index f0be6a546ff8..10bc0ff23c54 100644 --- a/clang/tools/clang-scan-deps/CMakeLists.txt +++ b/clang/tools/clang-scan-deps/CMakeLists.txt @@ -1,4 +1,5 @@ set(LLVM_LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} Core Option Support diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index a8f6150dd349..867df19c863f 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -15,6 +15,7 @@ #include "clang/Tooling/DependencyScanning/DependencyScanningTool.h" #include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h" #include "clang/Tooling/JSONCompilationDatabase.h" +#include "clang/Tooling/Tooling.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/CommandLine.h" @@ -24,6 +25,7 @@ #include "llvm/Support/LLVMDriver.h" #include "llvm/Support/Program.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/TargetSelect.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Threading.h" #include "llvm/Support/Timer.h" @@ -795,6 +797,7 @@ getCompilationDatabase(int argc, char **argv, std::string &ErrorMessage) { } int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { + llvm::InitializeAllTargetInfos(); std::string ErrorMessage; std::unique_ptr Compilations = getCompilationDatabase(argc, argv, ErrorMessage); @@ -810,6 +813,10 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { Compilations = expandResponseFiles(std::move(Compilations), llvm::vfs::getRealFileSystem()); + Compilations = inferTargetAndDriverMode(std::move(Compilations)); + + Compilations = inferToolLocation(std::move(Compilations)); + // The command options are rewritten to run Clang in preprocessor only mode. auto AdjustingCompilations = std::make_unique( @@ -830,7 +837,12 @@ int clang_scan_deps_main(int argc, char **argv, const llvm::ToolContext &) { // Reverse scan, starting at the end or at the element before "--". auto R = std::make_reverse_iterator(FlagsEnd); - for (auto I = R, E = Args.rend(); I != E; ++I) { + auto E = Args.rend(); + // Don't include Args[0] in the iteration; that's the executable, not + // an option. + if (E != R) + E--; + for (auto I = R; I != E; ++I) { StringRef Arg = *I; if (ClangCLMode) { // Ignore arguments that are preceded by "-Xclang". diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 29200b72d3d0..b7d8fc8ea72c 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -27364,6 +27364,12 @@ TEST_F(FormatTest, InsertNewlineAtEOF) { verifyNoChange("int i;\n", Style); verifyFormat("int i;\n", "int i;", Style); + + constexpr StringRef Code{"namespace {\n" + "int i;\n" + "} // namespace"}; + verifyFormat(Code.str() + '\n', Code, Style, + {tooling::Range(19, 13)}); // line 3 } TEST_F(FormatTest, KeepEmptyLinesAtEOF) { diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index 928c6c439bd1..8bdcd828eb24 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 0) + set(LLVM_VERSION_PATCH 1) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp index 648df0c4e5a7..b9b1f496df7c 100644 --- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp @@ -2014,6 +2014,18 @@ SignalContext::WriteFlag SignalContext::GetWriteFlag() const { return Unknown; return esr & ESR_ELx_WNR ? Write : Read; # elif defined(__loongarch__) + // In the musl environment, the Linux kernel uapi sigcontext.h is not + // included in signal.h. To avoid missing the SC_ADDRERR_{RD,WR} macros, + // copy them here. The LoongArch Linux kernel uapi is already stable, + // so there's no need to worry about the value changing. +# ifndef SC_ADDRERR_RD + // Address error was due to memory load +# define SC_ADDRERR_RD (1 << 30) +# endif +# ifndef SC_ADDRERR_WR + // Address error was due to memory store +# define SC_ADDRERR_WR (1 << 31) +# endif u32 flags = ucontext->uc_mcontext.__flags; if (flags & SC_ADDRERR_RD) return SignalContext::Read; diff --git a/libcxx/include/__config b/libcxx/include/__config index 661af5be3c22..b2abd372929d 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -27,7 +27,7 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 190100 +# define _LIBCPP_VERSION 190101 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp index 207f8e4df454..b474fe50083b 100644 --- a/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.timezone/time.zone.members/sys_info.zdump.pass.cpp @@ -13,9 +13,6 @@ // XFAIL: libcpp-has-no-experimental-tzdb // XFAIL: availability-tzdb-missing -// TODO TZDB Investigate -// XFAIL: target={{armv(7|8)l-linux-gnueabihf}} - #include #include #include @@ -28,7 +25,7 @@ // The year range to validate. The dates used in practice are expected to be // inside the tested range. constexpr std::chrono::year first{1800}; -constexpr std::chrono::year last{2100}; +constexpr std::chrono::year last{sizeof(time_t) == 8 ? 2100 : 2037}; // A custom sys_info class that also stores the name of the time zone. // Its formatter matches the output of zdump. diff --git a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt index f0289dc44c66..125b2184a49e 100644 --- a/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt +++ b/libcxx/test/tools/clang_tidy_checks/CMakeLists.txt @@ -1,3 +1,5 @@ +# TODO: Re-enable the tests once the CI is back under control +return() # The find_package changes these variables. This leaves the build in an odd # state. Calling cmake a second time tries to write site config information in diff --git a/libcxx/vendor/llvm/default_assertion_handler.in b/libcxx/vendor/llvm/default_assertion_handler.in index 3b6d6b2cca53..e12ccccdaff3 100644 --- a/libcxx/vendor/llvm/default_assertion_handler.in +++ b/libcxx/vendor/llvm/default_assertion_handler.in @@ -26,7 +26,8 @@ # if __has_builtin(__builtin_verbose_trap) // AppleClang shipped a slightly different version of __builtin_verbose_trap from the upstream // version before upstream Clang actually got the builtin. -# if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 17000 +// TODO: Remove once AppleClang supports the two-arguments version of the builtin. +# if defined(_LIBCPP_APPLE_CLANG_VER) && _LIBCPP_APPLE_CLANG_VER < 1700 # define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap(message) # else # define _LIBCPP_ASSERTION_HANDLER(message) __builtin_verbose_trap("libc++", message) diff --git a/lld/ELF/ICF.cpp b/lld/ELF/ICF.cpp index 44e8a71cc628..5591c5e71e0b 100644 --- a/lld/ELF/ICF.cpp +++ b/lld/ELF/ICF.cpp @@ -324,7 +324,7 @@ bool ICF::equalsConstant(const InputSection *a, const InputSection *b) { const RelsOrRelas ra = a->template relsOrRelas(); const RelsOrRelas rb = b->template relsOrRelas(); - if (ra.areRelocsCrel()) + if (ra.areRelocsCrel() || rb.areRelocsCrel()) return constantEq(a, ra.crels, b, rb.crels); return ra.areRelocsRel() || rb.areRelocsRel() ? constantEq(a, ra.rels, b, rb.rels) @@ -376,7 +376,7 @@ template bool ICF::equalsVariable(const InputSection *a, const InputSection *b) { const RelsOrRelas ra = a->template relsOrRelas(); const RelsOrRelas rb = b->template relsOrRelas(); - if (ra.areRelocsCrel()) + if (ra.areRelocsCrel() || rb.areRelocsCrel()) return variableEq(a, ra.crels, b, rb.crels); return ra.areRelocsRel() || rb.areRelocsRel() ? variableEq(a, ra.rels, b, rb.rels) diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 570e485455ba..a165c813d425 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -150,12 +150,12 @@ RelsOrRelas InputSectionBase::relsOrRelas(bool supportsCrel) const { InputSectionBase *const &relSec = f->getSections()[relSecIdx]; // Otherwise, allocate a buffer to hold the decoded RELA relocations. When // called for the first time, relSec is null (without --emit-relocs) or an - // InputSection with zero eqClass[0]. - if (!relSec || !cast(relSec)->eqClass[0]) { + // InputSection with false decodedCrel. + if (!relSec || !cast(relSec)->decodedCrel) { auto *sec = makeThreadLocal(*f, shdr, name); f->cacheDecodedCrel(relSecIdx, sec); sec->type = SHT_RELA; - sec->eqClass[0] = SHT_RELA; + sec->decodedCrel = true; RelocsCrel entries(sec->content_); sec->size = entries.size() * sizeof(typename ELFT::Rela); diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h index 6659530a9c9c..afa6ee5bd082 100644 --- a/lld/ELF/InputSection.h +++ b/lld/ELF/InputSection.h @@ -176,6 +176,10 @@ class InputSectionBase : public SectionBase { mutable bool compressed = false; + // Whether this section is SHT_CREL and has been decoded to RELA by + // relsOrRelas. + bool decodedCrel = false; + // Whether the section needs to be padded with a NOP filler due to // deleteFallThruJmpInsn. bool nopFiller = false; diff --git a/lld/test/ELF/icf10.s b/lld/test/ELF/icf10.s index 3c18c431c3b9..ff926d0e16b1 100644 --- a/lld/test/ELF/icf10.s +++ b/lld/test/ELF/icf10.s @@ -5,6 +5,9 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-pc-freebsd %s -o %t.o # RUN: ld.lld --icf=all %t.o -o /dev/null --print-icf-sections 2>&1 | FileCheck %s +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o --crel +# RUN: ld.lld --icf=all %t.o -o /dev/null --print-icf-sections 2>&1 | FileCheck %s + # Checks that ICF does not merge 2 sections the offset of # the relocations of which differ. diff --git a/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/Makefile b/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/Makefile index 98638c56f0b9..f938f7428468 100644 --- a/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/Makefile +++ b/lldb/test/API/commands/expression/import-std-module/deque-dbg-info-content/Makefile @@ -1,5 +1,3 @@ -# FIXME: once the expression evaluator can handle std libraries with debug -# info, change this to USE_LIBCPP=1 -USE_SYSTEM_STDLIB := 1 +USE_LIBCPP := 1 CXX_SOURCES := main.cpp include Makefile.rules diff --git a/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/Makefile b/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/Makefile index 98638c56f0b9..f938f7428468 100644 --- a/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/Makefile +++ b/lldb/test/API/commands/expression/import-std-module/list-dbg-info-content/Makefile @@ -1,5 +1,3 @@ -# FIXME: once the expression evaluator can handle std libraries with debug -# info, change this to USE_LIBCPP=1 -USE_SYSTEM_STDLIB := 1 +USE_LIBCPP := 1 CXX_SOURCES := main.cpp include Makefile.rules diff --git a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/Makefile b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/Makefile index 98638c56f0b9..f938f7428468 100644 --- a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/Makefile +++ b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/Makefile @@ -1,5 +1,3 @@ -# FIXME: once the expression evaluator can handle std libraries with debug -# info, change this to USE_LIBCPP=1 -USE_SYSTEM_STDLIB := 1 +USE_LIBCPP := 1 CXX_SOURCES := main.cpp include Makefile.rules diff --git a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py index 397ac6a14cca..1c32222e64f1 100644 --- a/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py +++ b/lldb/test/API/commands/expression/import-std-module/vector-dbg-info-content/TestDbgInfoContentVectorFromStdModule.py @@ -13,6 +13,7 @@ class TestDbgInfoContentVector(TestBase): @skipIf(compiler=no_match("clang")) @skipIf(compiler="clang", compiler_version=["<", "12.0"]) @skipIf(macos_version=["<", "14.0"]) + @skipIfDarwin # https://github.com/llvm/llvm-project/issues/106475 def test(self): self.build() diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index 4140387a1f34..1b5a6ee24b86 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -320,7 +320,7 @@ class AAResults { public: // Make these results default constructable and movable. We have to spell // these out because MSVC won't synthesize them. - AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {} + AAResults(const TargetLibraryInfo &TLI); AAResults(AAResults &&Arg); ~AAResults(); diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index 6eaaad5f332e..9cdb315b6088 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -73,6 +73,8 @@ static cl::opt EnableAATrace("aa-trace", cl::Hidden, cl::init(false)); static const bool EnableAATrace = false; #endif +AAResults::AAResults(const TargetLibraryInfo &TLI) : TLI(TLI) {} + AAResults::AAResults(AAResults &&Arg) : TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) {} diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index 51c50ff872ef..8f25ede0eb2b 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -272,6 +272,7 @@ bool InitUndef::runOnMachineFunction(MachineFunction &MF) { for (auto *DeadMI : DeadInsts) DeadMI->eraseFromParent(); DeadInsts.clear(); + NewRegs.clear(); return Changed; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 71cdec91e5f6..7b1f1dc40211 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -191,6 +191,11 @@ namespace { // AA - Used for DAG load/store alias analysis. AliasAnalysis *AA; + /// This caches all chains that have already been processed in + /// DAGCombiner::getStoreMergeCandidates() and found to have no mergeable + /// stores candidates. + SmallPtrSet ChainsWithoutMergeableStores; + /// When an instruction is simplified, add all users of the instruction to /// the work lists because they might get more simplified now. void AddUsersToWorklist(SDNode *N) { @@ -776,11 +781,10 @@ namespace { bool UseTrunc); /// This is a helper function for mergeConsecutiveStores. Stores that - /// potentially may be merged with St are placed in StoreNodes. RootNode is - /// a chain predecessor to all store candidates. - void getStoreMergeCandidates(StoreSDNode *St, - SmallVectorImpl &StoreNodes, - SDNode *&Root); + /// potentially may be merged with St are placed in StoreNodes. On success, + /// returns a chain predecessor to all store candidates. + SDNode *getStoreMergeCandidates(StoreSDNode *St, + SmallVectorImpl &StoreNodes); /// Helper function for mergeConsecutiveStores. Checks if candidate stores /// have indirect dependency through their operands. RootNode is the @@ -1782,6 +1786,9 @@ void DAGCombiner::Run(CombineLevel AtLevel) { ++NodesCombined; + // Invalidate cached info. + ChainsWithoutMergeableStores.clear(); + // If we get back the same node we passed in, rather than a new node or // zero, we know that the node must have defined multiple values and // CombineTo was used. Since CombineTo takes care of the worklist @@ -20372,15 +20379,15 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( return true; } -void DAGCombiner::getStoreMergeCandidates( - StoreSDNode *St, SmallVectorImpl &StoreNodes, - SDNode *&RootNode) { +SDNode * +DAGCombiner::getStoreMergeCandidates(StoreSDNode *St, + SmallVectorImpl &StoreNodes) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. We must have a base and an offset. Do not handle stores to undef // base pointers. BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); if (!BasePtr.getBase().getNode() || BasePtr.getBase().isUndef()) - return; + return nullptr; SDValue Val = peekThroughBitcasts(St->getValue()); StoreSource StoreSrc = getStoreSource(Val); @@ -20396,14 +20403,14 @@ void DAGCombiner::getStoreMergeCandidates( LoadVT = Ld->getMemoryVT(); // Load and store should be the same type. if (MemVT != LoadVT) - return; + return nullptr; // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) - return; + return nullptr; // The memory operands must not be volatile/indexed/atomic. // TODO: May be able to relax for unordered atomics (see D66309) if (!Ld->isSimple() || Ld->isIndexed()) - return; + return nullptr; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { @@ -20471,6 +20478,27 @@ void DAGCombiner::getStoreMergeCandidates( return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; + // We are looking for a root node which is an ancestor to all mergable + // stores. We search up through a load, to our root and then down + // through all children. For instance we will find Store{1,2,3} if + // St is Store1, Store2. or Store3 where the root is not a load + // which always true for nonvolatile ops. TODO: Expand + // the search to find all valid candidates through multiple layers of loads. + // + // Root + // |-------|-------| + // Load Load Store3 + // | | + // Store1 Store2 + // + // FIXME: We should be able to climb and + // descend TokenFactors to find candidates as well. + + SDNode *RootNode = St->getChain().getNode(); + // Bail out if we already analyzed this root node and found nothing. + if (ChainsWithoutMergeableStores.contains(RootNode)) + return nullptr; + // Check if the pair of StoreNode and the RootNode already bail out many // times which is over the limit in dependence check. auto OverLimitInDependenceCheck = [&](SDNode *StoreNode, @@ -20494,28 +20522,13 @@ void DAGCombiner::getStoreMergeCandidates( } }; - // We looking for a root node which is an ancestor to all mergable - // stores. We search up through a load, to our root and then down - // through all children. For instance we will find Store{1,2,3} if - // St is Store1, Store2. or Store3 where the root is not a load - // which always true for nonvolatile ops. TODO: Expand - // the search to find all valid candidates through multiple layers of loads. - // - // Root - // |-------|-------| - // Load Load Store3 - // | | - // Store1 Store2 - // - // FIXME: We should be able to climb and - // descend TokenFactors to find candidates as well. - - RootNode = St->getChain().getNode(); - unsigned NumNodesExplored = 0; const unsigned MaxSearchNodes = 1024; if (auto *Ldn = dyn_cast(RootNode)) { RootNode = Ldn->getChain().getNode(); + // Bail out if we already analyzed this root node and found nothing. + if (ChainsWithoutMergeableStores.contains(RootNode)) + return nullptr; for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) { if (I.getOperandNo() == 0 && isa(*I)) { // walk down chain @@ -20532,6 +20545,8 @@ void DAGCombiner::getStoreMergeCandidates( I != E && NumNodesExplored < MaxSearchNodes; ++I, ++NumNodesExplored) TryToAddCandidate(I); } + + return RootNode; } // We need to check that merging these stores does not cause a loop in the @@ -21162,9 +21177,8 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { return false; SmallVector StoreNodes; - SDNode *RootNode; // Find potential store merge candidates by searching through chain sub-DAG - getStoreMergeCandidates(St, StoreNodes, RootNode); + SDNode *RootNode = getStoreMergeCandidates(St, StoreNodes); // Check if there is anything to merge. if (StoreNodes.size() < 2) @@ -21220,6 +21234,11 @@ bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { llvm_unreachable("Unhandled store source type"); } } + + // Remember if we failed to optimize, to save compile time. + if (!MadeChange) + ChainsWithoutMergeableStores.insert(RootNode); + return MadeChange; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7f5b46af01c6..4b25f553ffae 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2190,7 +2190,8 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, Results.push_back(Tmp.first); Results.push_back(Tmp.second); } else { - SDValue Tmp = ExpandLibCall(LC, Node, false).first; + bool IsSignedArgument = Node->getOpcode() == ISD::FLDEXP; + SDValue Tmp = ExpandLibCall(LC, Node, IsSignedArgument).first; Results.push_back(Tmp); } } diff --git a/llvm/lib/ExecutionEngine/Orc/Core.cpp b/llvm/lib/ExecutionEngine/Orc/Core.cpp index 3e6de62c8b7d..f70c2890521d 100644 --- a/llvm/lib/ExecutionEngine/Orc/Core.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Core.cpp @@ -3592,6 +3592,21 @@ ExecutionSession::IL_failSymbols(JITDylib &JD, assert(MI.DefiningEDU->Symbols.count(NonOwningSymbolStringPtr(Name)) && "Symbol does not appear in its DefiningEDU"); MI.DefiningEDU->Symbols.erase(NonOwningSymbolStringPtr(Name)); + + // Remove this EDU from the dependants lists of its dependencies. + for (auto &[DepJD, DepSyms] : MI.DefiningEDU->Dependencies) { + for (auto DepSym : DepSyms) { + assert(DepJD->Symbols.count(SymbolStringPtr(DepSym)) && + "DepSym not in DepJD"); + assert(DepJD->MaterializingInfos.count(SymbolStringPtr(DepSym)) && + "DepSym has not MaterializingInfo"); + auto &SymMI = DepJD->MaterializingInfos[SymbolStringPtr(DepSym)]; + assert(SymMI.DependantEDUs.count(MI.DefiningEDU.get()) && + "DefiningEDU missing from DependantEDUs list of dependency"); + SymMI.DependantEDUs.erase(MI.DefiningEDU.get()); + } + } + MI.DefiningEDU = nullptr; } else { // Otherwise if there are any EDUs waiting on this symbol then move diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index b7471bab1285..7b786ee26417 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1911,7 +1911,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr, 0); } - Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16); + Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i32); return true; } @@ -1967,7 +1967,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, return false; if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, SplitImmOffset)) return false; - Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16); + Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i32); return true; } } @@ -2000,7 +2000,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr, if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset)) return false; SAddr = SelectSAddrFI(CurDAG, SAddr); - Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16); + Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32); return true; } diff --git a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp index 77db876d47e4..a8927d834630 100644 --- a/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp +++ b/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp @@ -122,8 +122,13 @@ bool AVRDAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base, // offset allowed. MVT VT = cast(Op)->getMemoryVT().getSimpleVT(); - // We only accept offsets that fit in 6 bits (unsigned). - if (isUInt<6>(RHSC) && (VT == MVT::i8 || VT == MVT::i16)) { + // We only accept offsets that fit in 6 bits (unsigned), with the exception + // of 16-bit loads - those can only go up to 62, because we desugar them + // into a pair of 8-bit loads like `ldd rx, RHSC` + `ldd ry, RHSC + 1`. + bool OkI8 = VT == MVT::i8 && RHSC <= 63; + bool OkI16 = VT == MVT::i16 && RHSC <= 62; + + if (OkI8 || OkI16) { Base = N.getOperand(0); Disp = CurDAG->getTargetConstant(RHSC, dl, MVT::i8); diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index d80509cf3984..082b42398c6a 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -295,6 +295,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); setOperationAction(ISD::SETCC, VT, Legal); setOperationAction(ISD::VSELECT, VT, Legal); @@ -5600,8 +5601,9 @@ bool LoongArchTargetLowering::shouldInsertFencesForAtomic( // On LA64, atomic store operations with IntegerBitWidth of 32 and 64 do not // require fences beacuse we can use amswap_db.[w/d]. - if (isa(I)) { - unsigned Size = I->getOperand(0)->getType()->getIntegerBitWidth(); + Type *Ty = I->getOperand(0)->getType(); + if (isa(I) && Ty->isIntegerTy()) { + unsigned Size = Ty->getIntegerBitWidth(); return (Size == 8 || Size == 16); } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index ef647a427787..339d50bd8192 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -1065,10 +1065,13 @@ def RDTIME_D : RDTIME_2R<0x00006800>; /// Generic pattern classes +def assertsexti32 : PatFrag<(ops node:$src), (assertsext node:$src), [{ + return cast(N->getOperand(1))->getVT().bitsLE(MVT::i32); +}]>; class PatGprGpr : Pat<(OpNode GPR:$rj, GPR:$rk), (Inst GPR:$rj, GPR:$rk)>; class PatGprGpr_32 - : Pat<(sext_inreg (OpNode GPR:$rj, GPR:$rk), i32), (Inst GPR:$rj, GPR:$rk)>; + : Pat<(sext_inreg (OpNode (assertsexti32 GPR:$rj), (assertsexti32 GPR:$rk)), i32), (Inst GPR:$rj, GPR:$rk)>; class PatGpr : Pat<(OpNode GPR:$rj), (Inst GPR:$rj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 6f1969bf8cae..0a220a0319bc 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1789,6 +1789,12 @@ def : Pat<(v4i32 (fp_to_uint (v4f64 LASX256:$vj))), v4f64:$vj)), sub_128)>; +// XVPERMI_Q +foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64] in +def : Pat<(vt (concat_vectors LSX128:$vd, LSX128:$vj)), + (XVPERMI_Q (SUBREG_TO_REG (i64 0), LSX128:$vd, sub_128), + (SUBREG_TO_REG (i64 0), LSX128:$vj, sub_128), 2)>; + } // Predicates = [HasExtLASX] /// Intrinsic pattern diff --git a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp index abac69054f3b..ab90409fdf47 100644 --- a/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchOptWInstrs.cpp @@ -637,6 +637,19 @@ static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, break; } return false; + // If all incoming values are sign-extended and all users only use + // the lower 32 bits, then convert them to W versions. + case LoongArch::DIV_D: { + if (!AddRegToWorkList(MI->getOperand(1).getReg())) + return false; + if (!AddRegToWorkList(MI->getOperand(2).getReg())) + return false; + if (hasAllWUsers(*MI, ST, MRI)) { + FixableDef.insert(MI); + break; + } + return false; + } } } @@ -651,6 +664,8 @@ static unsigned getWOp(unsigned Opcode) { return LoongArch::ADDI_W; case LoongArch::ADD_D: return LoongArch::ADD_W; + case LoongArch::DIV_D: + return LoongArch::DIV_W; case LoongArch::LD_D: case LoongArch::LD_WU: return LoongArch::LD_W; diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index c7f88fed9b12..efbcb57add98 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -2707,7 +2707,8 @@ bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) { bool MaybeDirectBranchDest = true; if (Parser.isParsingMasm()) { - if (is64BitMode() && SM.getElementSize() > 0) { + if (is64BitMode() && + ((PtrInOperand && !IndexReg) || SM.getElementSize() > 0)) { DefaultBaseReg = X86::RIP; } if (IsUnconditionalBranch) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5a9d679d7002..45989bcd07d3 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3415,7 +3415,7 @@ unsigned X86TargetLowering::preferedOpcodeForCmpEqPiecesOfOperand( // We prefer rotate for vectors of if we won't get a zext mask with SRL // (PreferRotate will be set in the latter case). - if (PreferRotate || VT.isVector()) + if (PreferRotate || !MayTransformRotate || VT.isVector()) return ShiftOpc; // Non-vector type and we have a zext mask with SRL. diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index da690aea43f5..cc1f9090c11a 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2625,11 +2625,11 @@ multiclass avx512_mask_mov opc_kk, bits<8> opc_km, bits<8> opc_mk, def km#Suffix : I, - Sched<[WriteLoad]>; + Sched<[WriteLoad]>, NoCD8; def mk#Suffix : I, - Sched<[WriteStore]>; + Sched<[WriteStore]>, NoCD8; } multiclass avx512_mask_mov_gpr opc_kr, bits<8> opc_rk, diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 9c9fc7a49a9d..68696789530f 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1349,7 +1349,8 @@ static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin, // Add attributes from CB params and Fn attributes that can always be propagated // to the corresponding argument / inner callbases. static void AddParamAndFnBasicAttributes(const CallBase &CB, - ValueToValueMapTy &VMap) { + ValueToValueMapTy &VMap, + ClonedCodeInfo &InlinedFunctionInfo) { auto *CalledFunction = CB.getCalledFunction(); auto &Context = CalledFunction->getContext(); @@ -1380,6 +1381,11 @@ static void AddParamAndFnBasicAttributes(const CallBase &CB, auto *NewInnerCB = dyn_cast_or_null(VMap.lookup(InnerCB)); if (!NewInnerCB) continue; + // The InnerCB might have be simplified during the inlining + // process which can make propagation incorrect. + if (InlinedFunctionInfo.isSimplified(InnerCB, NewInnerCB)) + continue; + AttributeList AL = NewInnerCB->getAttributes(); for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) { // Check if the underlying value for the parameter is an argument. @@ -1455,7 +1461,8 @@ static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) { return Valid; } -static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { +static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap, + ClonedCodeInfo &InlinedFunctionInfo) { AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB); AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB); if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes()) @@ -1474,6 +1481,11 @@ static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap) { auto *NewRetVal = dyn_cast_or_null(VMap.lookup(RetVal)); if (!NewRetVal) continue; + + // The RetVal might have be simplified during the inlining + // process which can make propagation incorrect. + if (InlinedFunctionInfo.isSimplified(RetVal, NewRetVal)) + continue; // Backward propagation of attributes to the returned value may be incorrect // if it is control flow dependent. // Consider: @@ -2456,11 +2468,11 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // Clone return attributes on the callsite into the calls within the inlined // function which feed into its return value. - AddReturnAttributes(CB, VMap); + AddReturnAttributes(CB, VMap, InlinedFunctionInfo); // Clone attributes on the params of the callsite to calls within the // inlined function which use the same param. - AddParamAndFnBasicAttributes(CB, VMap); + AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo); propagateMemProfMetadata(CalledFunc, CB, InlinedFunctionInfo.ContainsMemProfMetadata, VMap); diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 5d7c0d947fac..760f1619e030 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -859,7 +859,7 @@ static void cloneLoopBlocks( if (LatchInst && L->contains(LatchInst)) LatchVal = VMap[LatchVal]; PHI.addIncoming(LatchVal, cast(VMap[Edge.first])); - SE.forgetValue(&PHI); + SE.forgetLcssaPhiWithNewPredecessor(L, &PHI); } // LastValueMap is updated with the values for the current loop diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll index a5e4151bf369..47ca6f416b02 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch.ll @@ -1513,4 +1513,243 @@ bb: ret void } +define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) { +; GFX9-LABEL: sgpr_base_large_offset: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 s0, s2, 0xffe8 +; GFX9-NEXT: scratch_load_dword v2, off, s0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sgpr_base_large_offset: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_add_u32 s0, s0, s5 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_u32 s0, s2, 0xffe8 +; GFX10-NEXT: scratch_load_dword v2, off, s0 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[0:1], v2, off +; GFX10-NEXT: s_endpgm +; +; GFX940-LABEL: sgpr_base_large_offset: +; GFX940: ; %bb.0: ; %entry +; GFX940-NEXT: s_add_u32 s0, s0, 0xffe8 +; GFX940-NEXT: scratch_load_dword v2, off, s0 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 +; GFX940-NEXT: s_endpgm +; +; GFX11-LABEL: sgpr_base_large_offset: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_add_u32 s0, s0, 0xffe8 +; GFX11-NEXT: scratch_load_b32 v2, off, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: sgpr_base_large_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +entry: + %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512 + %load = load i32, ptr addrspace(5) %large_offset, align 4 + store i32 %load, ptr addrspace(1) %out + ret void +} + +define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) { +; GFX9-LABEL: sgpr_base_large_offset_split: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_and_b32 s0, s2, -4 +; GFX9-NEXT: s_add_u32 s0, s0, 0x100ffe8 +; GFX9-NEXT: scratch_load_dword v2, off, s0 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sgpr_base_large_offset_split: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_add_u32 s0, s0, s5 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_and_b32 s0, s2, -4 +; GFX10-NEXT: s_add_u32 s0, s0, 0x100ffe8 +; GFX10-NEXT: scratch_load_dword v2, off, s0 glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[0:1], v2, off +; GFX10-NEXT: s_endpgm +; +; GFX940-LABEL: sgpr_base_large_offset_split: +; GFX940: ; %bb.0: ; %entry +; GFX940-NEXT: s_and_b32 s0, s0, -4 +; GFX940-NEXT: s_add_u32 s0, s0, 0x100ffe8 +; GFX940-NEXT: scratch_load_dword v2, off, s0 sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 +; GFX940-NEXT: s_endpgm +; +; GFX11-LABEL: sgpr_base_large_offset_split: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_and_b32 s0, s0, -4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_add_u32 s0, s0, 0x100ffe8 +; GFX11-NEXT: scratch_load_b32 v2, off, s0 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: sgpr_base_large_offset_split: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: s_and_b32 s0, s0, -4 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: s_add_co_u32 s0, s0, 0x100ffe8 +; GFX12-NEXT: scratch_load_b32 v2, off, s0 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +entry: + ;%allignedBase = alloca [33554432 x i8], align 4, addrspace(5) + %sgpr_base_i32 = ptrtoint ptr addrspace(5) %sgpr_base to i32 + %sgpr_base_i32_align4 = and i32 %sgpr_base_i32, 4294967292 + %sgpr_base_align4 = inttoptr i32 %sgpr_base_i32_align4 to ptr addrspace(5) + %split_offset = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base_align4, i32 0, i32 16842728 + %load = load volatile i32, ptr addrspace(5) %split_offset, align 4 + store i32 %load, ptr addrspace(1) %out + ret void +} + +define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) { +; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-NEXT: v_add_u32_e32 v0, s3, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, 0xffe8 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: v_add3_u32 v0, s2, v0, v1 +; GFX9-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-NEXT: scratch_store_dword v0, v1, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s0, s0, s5 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: v_add_nc_u32_e32 v0, s3, v0 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: v_add3_u32 v0, s2, v0, 0xffe8 +; GFX10-NEXT: scratch_store_dword v0, v1, off +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_endpgm +; +; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX940: ; %bb.0: ; %bb +; GFX940-NEXT: v_add_u32_e32 v0, s1, v0 +; GFX940-NEXT: v_mov_b32_e32 v1, 0xffe8 +; GFX940-NEXT: v_add3_u32 v0, s0, v0, v1 +; GFX940-NEXT: v_mov_b32_e32 v1, 15 +; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_endpgm +; +; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_add3_u32 v0, s0, v0, 0xffe8 +; GFX11-NEXT: scratch_store_b32 v0, v1, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX12: ; %bb.0: ; %bb +; GFX12-NEXT: v_dual_mov_b32 v1, 15 :: v_dual_add_nc_u32 v0, s1, v0 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX12-NEXT: v_add_nc_u32_e32 v0, s0, v0 +; GFX12-NEXT: scratch_store_b32 v0, v1, off offset:65512 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_endpgm +bb: + %add1 = add nsw i32 %sidx, %vidx + %add2 = add nsw i32 %add1, 65512 + %gep = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2 + store volatile i32 15, ptr addrspace(5) %gep, align 4 + ret void +} + +define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) { +; GFX9-LABEL: sgpr_base_negative_offset: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_u32 s0, s2, 0xffffffe8 +; GFX9-NEXT: scratch_load_dword v2, off, s0 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sgpr_base_negative_offset: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_add_u32 s0, s0, s5 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: scratch_load_dword v2, off, s2 offset:-24 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[0:1], v2, off +; GFX10-NEXT: s_endpgm +; +; GFX940-LABEL: sgpr_base_negative_offset: +; GFX940: ; %bb.0: ; %entry +; GFX940-NEXT: s_add_u32 s0, s0, 0xffffffe8 +; GFX940-NEXT: scratch_load_dword v2, off, s0 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 +; GFX940-NEXT: s_endpgm +; +; GFX11-LABEL: sgpr_base_negative_offset: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: scratch_load_b32 v2, off, s0 offset:-24 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: sgpr_base_negative_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +entry: + %scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 -24 + %0 = load i32, ptr addrspace(5) %scevgep28, align 4 + store i32 %0, ptr addrspace(1) %out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll index 14d8b71c5167..284f17461452 100644 --- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll +++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll @@ -4921,5 +4921,449 @@ bb: ret void } +define amdgpu_gs void @sgpr_base_large_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) { +; GFX9-LABEL: sgpr_base_large_offset: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_i32 s2, s2, 0xffe8 +; GFX9-NEXT: scratch_load_dword v2, off, s2 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sgpr_base_large_offset: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_add_u32 s0, s0, s5 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_i32 s2, s2, 0xffe8 +; GFX10-NEXT: scratch_load_dword v2, off, s2 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[0:1], v2, off +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: sgpr_base_large_offset: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: s_add_i32 s0, s0, 0xffe8 +; GFX11-NEXT: scratch_load_b32 v2, off, s0 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: sgpr_base_large_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:65512 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +; +; GFX9-PAL-LABEL: sgpr_base_large_offset: +; GFX9-PAL: ; %bb.0: ; %entry +; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] +; GFX9-PAL-NEXT: s_mov_b32 s2, s8 +; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0xffe8 +; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0 +; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off +; GFX9-PAL-NEXT: s_endpgm +; +; GFX940-LABEL: sgpr_base_large_offset: +; GFX940: ; %bb.0: ; %entry +; GFX940-NEXT: s_add_i32 s0, s0, 0xffe8 +; GFX940-NEXT: scratch_load_dword v2, off, s0 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 +; GFX940-NEXT: s_endpgm +; +; GFX10-PAL-LABEL: sgpr_base_large_offset: +; GFX10-PAL: ; %bb.0: ; %entry +; GFX10-PAL-NEXT: s_getpc_b64 s[2:3] +; GFX10-PAL-NEXT: s_mov_b32 s2, s8 +; GFX10-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-PAL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX10-PAL-NEXT: s_add_u32 s2, s2, s5 +; GFX10-PAL-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0xffe8 +; GFX10-PAL-NEXT: scratch_load_dword v2, off, s0 +; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX10-PAL-NEXT: global_store_dword v[0:1], v2, off +; GFX10-PAL-NEXT: s_endpgm +; +; GFX11-PAL-LABEL: sgpr_base_large_offset: +; GFX11-PAL: ; %bb.0: ; %entry +; GFX11-PAL-NEXT: s_add_i32 s0, s0, 0xffe8 +; GFX11-PAL-NEXT: scratch_load_b32 v2, off, s0 +; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX11-PAL-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-PAL-NEXT: s_nop 0 +; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-PAL-NEXT: s_endpgm +; +; GFX12-PAL-LABEL: sgpr_base_large_offset: +; GFX12-PAL: ; %bb.0: ; %entry +; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:65512 +; GFX12-PAL-NEXT: s_wait_loadcnt 0x0 +; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-PAL-NEXT: s_nop 0 +; GFX12-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-PAL-NEXT: s_endpgm +entry: + %large_offset = getelementptr i8, ptr addrspace(5) %sgpr_base, i32 65512 + %load = load i32, ptr addrspace(5) %large_offset, align 4 + store i32 %load, ptr addrspace(1) %out + ret void +} + +define amdgpu_gs void @sgpr_base_large_offset_split(ptr addrspace(1) %out, ptr addrspace(5) inreg %sgpr_base) { +; GFX9-LABEL: sgpr_base_large_offset_split: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_and_b32 s0, s2, -4 +; GFX9-NEXT: s_add_i32 s0, s0, 0x100f000 +; GFX9-NEXT: scratch_load_dword v2, off, s0 offset:4072 glc +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sgpr_base_large_offset_split: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_add_u32 s0, s0, s5 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_and_b32 s0, s2, -4 +; GFX10-NEXT: s_add_i32 s0, s0, 0x100f800 +; GFX10-NEXT: scratch_load_dword v2, off, s0 offset:2024 glc dlc +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[0:1], v2, off +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: sgpr_base_large_offset_split: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: v_mov_b32_e32 v2, 0x100f000 +; GFX11-NEXT: s_and_b32 s0, s0, -4 +; GFX11-NEXT: scratch_load_b32 v2, v2, s0 offset:4072 glc dlc +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: sgpr_base_large_offset_split: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: v_mov_b32_e32 v2, 0x1000000 +; GFX12-NEXT: s_and_b32 s0, s0, -4 +; GFX12-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +; +; GFX9-PAL-LABEL: sgpr_base_large_offset_split: +; GFX9-PAL: ; %bb.0: ; %entry +; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] +; GFX9-PAL-NEXT: s_mov_b32 s2, s8 +; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-PAL-NEXT: s_and_b32 s0, s0, -4 +; GFX9-PAL-NEXT: s_add_i32 s0, s0, 0x100f000 +; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0 offset:4072 glc +; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off +; GFX9-PAL-NEXT: s_endpgm +; +; GFX940-LABEL: sgpr_base_large_offset_split: +; GFX940: ; %bb.0: ; %entry +; GFX940-NEXT: s_and_b32 s0, s0, -4 +; GFX940-NEXT: v_mov_b32_e32 v2, 0x100f000 +; GFX940-NEXT: scratch_load_dword v2, v2, s0 offset:4072 sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 +; GFX940-NEXT: s_endpgm +; +; GFX10-PAL-LABEL: sgpr_base_large_offset_split: +; GFX10-PAL: ; %bb.0: ; %entry +; GFX10-PAL-NEXT: s_getpc_b64 s[2:3] +; GFX10-PAL-NEXT: s_mov_b32 s2, s8 +; GFX10-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-PAL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX10-PAL-NEXT: s_add_u32 s2, s2, s5 +; GFX10-PAL-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-PAL-NEXT: s_and_b32 s0, s0, -4 +; GFX10-PAL-NEXT: s_add_i32 s0, s0, 0x100f800 +; GFX10-PAL-NEXT: scratch_load_dword v2, off, s0 offset:2024 glc dlc +; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX10-PAL-NEXT: global_store_dword v[0:1], v2, off +; GFX10-PAL-NEXT: s_endpgm +; +; GFX11-PAL-LABEL: sgpr_base_large_offset_split: +; GFX11-PAL: ; %bb.0: ; %entry +; GFX11-PAL-NEXT: v_mov_b32_e32 v2, 0x100f000 +; GFX11-PAL-NEXT: s_and_b32 s0, s0, -4 +; GFX11-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:4072 glc dlc +; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX11-PAL-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-PAL-NEXT: s_nop 0 +; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-PAL-NEXT: s_endpgm +; +; GFX12-PAL-LABEL: sgpr_base_large_offset_split: +; GFX12-PAL: ; %bb.0: ; %entry +; GFX12-PAL-NEXT: v_mov_b32_e32 v2, 0x1000000 +; GFX12-PAL-NEXT: s_and_b32 s0, s0, -4 +; GFX12-PAL-NEXT: scratch_load_b32 v2, v2, s0 offset:65512 scope:SCOPE_SYS +; GFX12-PAL-NEXT: s_wait_loadcnt 0x0 +; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-PAL-NEXT: s_nop 0 +; GFX12-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-PAL-NEXT: s_endpgm +entry: + ;%allignedBase = alloca [33554432 x i8], align 4, addrspace(5) + %sgpr_base_i32 = ptrtoint ptr addrspace(5) %sgpr_base to i32 + %sgpr_base_i32_align4 = and i32 %sgpr_base_i32, 4294967292 + %sgpr_base_align4 = inttoptr i32 %sgpr_base_i32_align4 to ptr addrspace(5) + %split_offset = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base_align4, i32 0, i32 16842728 + %load = load volatile i32, ptr addrspace(5) %split_offset, align 4 + store i32 %load, ptr addrspace(1) %out + ret void +} + +define amdgpu_gs void @sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset(ptr addrspace(5) inreg %sgpr_base, i32 inreg %sidx, i32 %vidx) { +; GFX9-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX9: ; %bb.0: ; %bb +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_add_i32 s2, s2, s3 +; GFX9-NEXT: v_add_u32_e32 v0, s2, v0 +; GFX9-NEXT: v_add_u32_e32 v0, 0xffe8, v0 +; GFX9-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-NEXT: scratch_store_dword v0, v1, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX10: ; %bb.0: ; %bb +; GFX10-NEXT: s_add_u32 s0, s0, s5 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: s_add_i32 s2, s2, s3 +; GFX10-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-NEXT: v_add3_u32 v0, s2, v0, 0xffe8 +; GFX10-NEXT: scratch_store_dword v0, v1, off +; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX11: ; %bb.0: ; %bb +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v1, 15 +; GFX11-NEXT: v_add3_u32 v0, s0, v0, 0xffe8 +; GFX11-NEXT: scratch_store_b32 v0, v1, off dlc +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX12: ; %bb.0: ; %bb +; GFX12-NEXT: v_mov_b32_e32 v1, 15 +; GFX12-NEXT: s_add_co_i32 s0, s0, s1 +; GFX12-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS +; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_endpgm +; +; GFX9-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX9-PAL: ; %bb.0: ; %bb +; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] +; GFX9-PAL-NEXT: s_mov_b32 s2, s8 +; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-PAL-NEXT: s_add_i32 s0, s0, s1 +; GFX9-PAL-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX9-PAL-NEXT: v_add_u32_e32 v0, 0xffe8, v0 +; GFX9-PAL-NEXT: scratch_store_dword v0, v1, off +; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX9-PAL-NEXT: s_endpgm +; +; GFX940-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX940: ; %bb.0: ; %bb +; GFX940-NEXT: s_add_i32 s0, s0, s1 +; GFX940-NEXT: v_add_u32_e32 v0, s0, v0 +; GFX940-NEXT: v_add_u32_e32 v0, 0xffe8, v0 +; GFX940-NEXT: v_mov_b32_e32 v1, 15 +; GFX940-NEXT: scratch_store_dword v0, v1, off sc0 sc1 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: s_endpgm +; +; GFX10-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX10-PAL: ; %bb.0: ; %bb +; GFX10-PAL-NEXT: s_getpc_b64 s[2:3] +; GFX10-PAL-NEXT: s_mov_b32 s2, s8 +; GFX10-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-PAL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX10-PAL-NEXT: s_add_u32 s2, s2, s5 +; GFX10-PAL-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-PAL-NEXT: s_add_i32 s0, s0, s1 +; GFX10-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX10-PAL-NEXT: v_add3_u32 v0, s0, v0, 0xffe8 +; GFX10-PAL-NEXT: scratch_store_dword v0, v1, off +; GFX10-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-PAL-NEXT: s_endpgm +; +; GFX11-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX11-PAL: ; %bb.0: ; %bb +; GFX11-PAL-NEXT: s_add_i32 s0, s0, s1 +; GFX11-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX11-PAL-NEXT: v_add3_u32 v0, s0, v0, 0xffe8 +; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off dlc +; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-PAL-NEXT: s_endpgm +; +; GFX12-PAL-LABEL: sgpr_base_plus_sgpr_plus_vgpr_plus_large_imm_offset: +; GFX12-PAL: ; %bb.0: ; %bb +; GFX12-PAL-NEXT: v_mov_b32_e32 v1, 15 +; GFX12-PAL-NEXT: s_add_co_i32 s0, s0, s1 +; GFX12-PAL-NEXT: scratch_store_b32 v0, v1, s0 offset:65512 scope:SCOPE_SYS +; GFX12-PAL-NEXT: s_wait_storecnt 0x0 +; GFX12-PAL-NEXT: s_endpgm +bb: + %add1 = add nsw i32 %sidx, %vidx + %add2 = add nsw i32 %add1, 65512 + %gep = getelementptr inbounds [33554432 x i8], ptr addrspace(5) %sgpr_base, i32 0, i32 %add2 + store volatile i32 15, ptr addrspace(5) %gep, align 4 + ret void +} + +define amdgpu_gs void @sgpr_base_negative_offset(ptr addrspace(1) %out, ptr addrspace(5) inreg %scevgep) { +; GFX9-LABEL: sgpr_base_negative_offset: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_add_u32 flat_scratch_lo, s0, s5 +; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s1, 0 +; GFX9-NEXT: s_addk_i32 s2, 0xffe8 +; GFX9-NEXT: scratch_load_dword v2, off, s2 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: global_store_dword v[0:1], v2, off +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: sgpr_base_negative_offset: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_add_u32 s0, s0, s5 +; GFX10-NEXT: s_addc_u32 s1, s1, 0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s0 +; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s1 +; GFX10-NEXT: scratch_load_dword v2, off, s2 offset:-24 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: global_store_dword v[0:1], v2, off +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: sgpr_base_negative_offset: +; GFX11: ; %bb.0: ; %entry +; GFX11-NEXT: scratch_load_b32 v2, off, s0 offset:-24 +; GFX11-NEXT: s_waitcnt vmcnt(0) +; GFX11-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-NEXT: s_nop 0 +; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-NEXT: s_endpgm +; +; GFX12-LABEL: sgpr_base_negative_offset: +; GFX12: ; %bb.0: ; %entry +; GFX12-NEXT: scratch_load_b32 v2, off, s0 offset:-24 +; GFX12-NEXT: s_wait_loadcnt 0x0 +; GFX12-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-NEXT: s_nop 0 +; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-NEXT: s_endpgm +; +; GFX9-PAL-LABEL: sgpr_base_negative_offset: +; GFX9-PAL: ; %bb.0: ; %entry +; GFX9-PAL-NEXT: s_getpc_b64 s[2:3] +; GFX9-PAL-NEXT: s_mov_b32 s2, s8 +; GFX9-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-PAL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-PAL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX9-PAL-NEXT: s_add_u32 flat_scratch_lo, s2, s5 +; GFX9-PAL-NEXT: s_addc_u32 flat_scratch_hi, s3, 0 +; GFX9-PAL-NEXT: s_addk_i32 s0, 0xffe8 +; GFX9-PAL-NEXT: scratch_load_dword v2, off, s0 +; GFX9-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX9-PAL-NEXT: global_store_dword v[0:1], v2, off +; GFX9-PAL-NEXT: s_endpgm +; +; GFX940-LABEL: sgpr_base_negative_offset: +; GFX940: ; %bb.0: ; %entry +; GFX940-NEXT: s_addk_i32 s0, 0xffe8 +; GFX940-NEXT: scratch_load_dword v2, off, s0 +; GFX940-NEXT: s_waitcnt vmcnt(0) +; GFX940-NEXT: global_store_dword v[0:1], v2, off sc0 sc1 +; GFX940-NEXT: s_endpgm +; +; GFX10-PAL-LABEL: sgpr_base_negative_offset: +; GFX10-PAL: ; %bb.0: ; %entry +; GFX10-PAL-NEXT: s_getpc_b64 s[2:3] +; GFX10-PAL-NEXT: s_mov_b32 s2, s8 +; GFX10-PAL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX10-PAL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-PAL-NEXT: s_and_b32 s3, s3, 0xffff +; GFX10-PAL-NEXT: s_add_u32 s2, s2, s5 +; GFX10-PAL-NEXT: s_addc_u32 s3, s3, 0 +; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s2 +; GFX10-PAL-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s3 +; GFX10-PAL-NEXT: scratch_load_dword v2, off, s0 offset:-24 +; GFX10-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX10-PAL-NEXT: global_store_dword v[0:1], v2, off +; GFX10-PAL-NEXT: s_endpgm +; +; GFX11-PAL-LABEL: sgpr_base_negative_offset: +; GFX11-PAL: ; %bb.0: ; %entry +; GFX11-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24 +; GFX11-PAL-NEXT: s_waitcnt vmcnt(0) +; GFX11-PAL-NEXT: global_store_b32 v[0:1], v2, off +; GFX11-PAL-NEXT: s_nop 0 +; GFX11-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX11-PAL-NEXT: s_endpgm +; +; GFX12-PAL-LABEL: sgpr_base_negative_offset: +; GFX12-PAL: ; %bb.0: ; %entry +; GFX12-PAL-NEXT: scratch_load_b32 v2, off, s0 offset:-24 +; GFX12-PAL-NEXT: s_wait_loadcnt 0x0 +; GFX12-PAL-NEXT: global_store_b32 v[0:1], v2, off +; GFX12-PAL-NEXT: s_nop 0 +; GFX12-PAL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) +; GFX12-PAL-NEXT: s_endpgm +entry: + %scevgep28 = getelementptr i8, ptr addrspace(5) %scevgep, i32 -24 + %0 = load i32, ptr addrspace(5) %scevgep28, align 4 + store i32 %0, ptr addrspace(1) %out + ret void +} + declare void @llvm.memset.p5.i64(ptr addrspace(5) nocapture writeonly, i8, i64, i1 immarg) declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AVR/ldd-immediate-overflow.ll b/llvm/test/CodeGen/AVR/ldd-immediate-overflow.ll new file mode 100644 index 000000000000..6f1a4b32bb05 --- /dev/null +++ b/llvm/test/CodeGen/AVR/ldd-immediate-overflow.ll @@ -0,0 +1,144 @@ +; RUN: llc -march=avr -filetype=asm -O1 < %s | FileCheck %s + +define void @check60(ptr %1) { +; CHECK-LABEL: check60: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: ldd r24, Z+60 +; CHECK-NEXT: ldd r25, Z+61 +; CHECK-NEXT: ldd r18, Z+62 +; CHECK-NEXT: ldd r19, Z+63 +; CHECK-NEXT: sts 3, r19 +; CHECK-NEXT: sts 2, r18 +; CHECK-NEXT: sts 1, r25 +; CHECK-NEXT: sts 0, r24 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i16 60 + %3 = load i32, ptr %2, align 1 + store i32 %3, ptr null, align 1 + ret void +} + +define void @check61(ptr %1) { +; CHECK-LABEL: check61: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: ldd r18, Z+61 +; CHECK-NEXT: ldd r19, Z+62 +; CHECK-NEXT: adiw r24, 63 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: ld r24, Z +; CHECK-NEXT: ldd r25, Z+1 +; CHECK-NEXT: sts 3, r25 +; CHECK-NEXT: sts 2, r24 +; CHECK-NEXT: sts 1, r19 +; CHECK-NEXT: sts 0, r18 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i16 61 + %3 = load i32, ptr %2, align 1 + store i32 %3, ptr null, align 1 + ret void +} + +define void @check62(ptr %1) { +; CHECK-LABEL: check62: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: ldd r18, Z+62 +; CHECK-NEXT: ldd r19, Z+63 +; CHECK-NEXT: adiw r24, 62 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: ldd r24, Z+2 +; CHECK-NEXT: ldd r25, Z+3 +; CHECK-NEXT: sts 3, r25 +; CHECK-NEXT: sts 2, r24 +; CHECK-NEXT: sts 1, r19 +; CHECK-NEXT: sts 0, r18 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i16 62 + %3 = load i32, ptr %2, align 1 + store i32 %3, ptr null, align 1 + ret void +} + +define void @check63(ptr %1) { +; CHECK-LABEL: check63: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: adiw r24, 63 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: ld r24, Z +; CHECK-NEXT: ldd r25, Z+1 +; CHECK-NEXT: ldd r18, Z+2 +; CHECK-NEXT: ldd r19, Z+3 +; CHECK-NEXT: sts 3, r19 +; CHECK-NEXT: sts 2, r18 +; CHECK-NEXT: sts 1, r25 +; CHECK-NEXT: sts 0, r24 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i16 63 + %3 = load i32, ptr %2, align 1 + store i32 %3, ptr null, align 1 + ret void +} + +define void @check64(ptr %1) { +; CHECK-LABEL: check64: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: subi r24, 192 +; CHECK-NEXT: sbci r25, 255 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: ld r24, Z +; CHECK-NEXT: ldd r25, Z+1 +; CHECK-NEXT: ldd r18, Z+2 +; CHECK-NEXT: ldd r19, Z+3 +; CHECK-NEXT: sts 3, r19 +; CHECK-NEXT: sts 2, r18 +; CHECK-NEXT: sts 1, r25 +; CHECK-NEXT: sts 0, r24 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i16 64 + %3 = load i32, ptr %2, align 1 + store i32 %3, ptr null, align 1 + ret void +} + +define void @check65(ptr %1) { +; CHECK-LABEL: check65: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: subi r24, 191 +; CHECK-NEXT: sbci r25, 255 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: ld r24, Z +; CHECK-NEXT: ldd r25, Z+1 +; CHECK-NEXT: ldd r18, Z+2 +; CHECK-NEXT: ldd r19, Z+3 +; CHECK-NEXT: sts 3, r19 +; CHECK-NEXT: sts 2, r18 +; CHECK-NEXT: sts 1, r25 +; CHECK-NEXT: sts 0, r24 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i16 65 + %3 = load i32, ptr %2, align 1 + store i32 %3, ptr null, align 1 + ret void +} diff --git a/llvm/test/CodeGen/AVR/std-immediate-overflow.ll b/llvm/test/CodeGen/AVR/std-immediate-overflow.ll new file mode 100644 index 000000000000..18ccb79d3a5f --- /dev/null +++ b/llvm/test/CodeGen/AVR/std-immediate-overflow.ll @@ -0,0 +1,137 @@ +; RUN: llc -march=avr -filetype=asm -O1 < %s | FileCheck %s + +define void @check60(ptr %1) { +; CHECK-LABEL: check60: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: ldi r18, 0 +; CHECK-NEXT: ldi r19, 0 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: std Z+63, r19 +; CHECK-NEXT: std Z+62, r18 +; CHECK-NEXT: ldi r24, 210 +; CHECK-NEXT: ldi r25, 4 +; CHECK-NEXT: std Z+61, r25 +; CHECK-NEXT: std Z+60, r24 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i8 60 + store i32 1234, ptr %2 + ret void +} + +define void @check61(ptr %1) { +; CHECK-LABEL: check61: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: ldi r18, 210 +; CHECK-NEXT: ldi r19, 4 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: std Z+62, r19 +; CHECK-NEXT: std Z+61, r18 +; CHECK-NEXT: adiw r24, 63 +; CHECK-NEXT: ldi r18, 0 +; CHECK-NEXT: ldi r19, 0 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: std Z+1, r19 +; CHECK-NEXT: st Z, r18 + +bb0: + %2 = getelementptr i8, ptr %1, i8 61 + store i32 1234, ptr %2 + ret void +} + +define void @check62(ptr %1) { +; CHECK-LABEL: check62: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: ldi r18, 210 +; CHECK-NEXT: ldi r19, 4 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: std Z+63, r19 +; CHECK-NEXT: std Z+62, r18 +; CHECK-NEXT: adiw r24, 62 +; CHECK-NEXT: ldi r18, 0 +; CHECK-NEXT: ldi r19, 0 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: std Z+3, r19 +; CHECK-NEXT: std Z+2, r18 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i8 62 + store i32 1234, ptr %2 + ret void +} + +define void @check63(ptr %1) { +; CHECK-LABEL: check63: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: adiw r24, 63 +; CHECK-NEXT: ldi r18, 0 +; CHECK-NEXT: ldi r19, 0 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: std Z+3, r19 +; CHECK-NEXT: std Z+2, r18 +; CHECK-NEXT: ldi r24, 210 +; CHECK-NEXT: ldi r25, 4 +; CHECK-NEXT: std Z+1, r25 +; CHECK-NEXT: st Z, r24 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i8 63 + store i32 1234, ptr %2 + ret void +} + +define void @check64(ptr %1) { +; CHECK-LABEL: check64: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: subi r24, 192 +; CHECK-NEXT: sbci r25, 255 +; CHECK-NEXT: ldi r18, 0 +; CHECK-NEXT: ldi r19, 0 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: std Z+3, r19 +; CHECK-NEXT: std Z+2, r18 +; CHECK-NEXT: ldi r24, 210 +; CHECK-NEXT: ldi r25, 4 +; CHECK-NEXT: std Z+1, r25 +; CHECK-NEXT: st Z, r24 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i8 64 + store i32 1234, ptr %2 + ret void +} + +define void @check65(ptr %1) { +; CHECK-LABEL: check65: +; CHECK-NEXT: %bb.0 +; CHECK-NEXT: subi r24, 191 +; CHECK-NEXT: sbci r25, 255 +; CHECK-NEXT: ldi r18, 0 +; CHECK-NEXT: ldi r19, 0 +; CHECK-NEXT: mov r30, r24 +; CHECK-NEXT: mov r31, r25 +; CHECK-NEXT: std Z+3, r19 +; CHECK-NEXT: std Z+2, r18 +; CHECK-NEXT: ldi r24, 210 +; CHECK-NEXT: ldi r25, 4 +; CHECK-NEXT: std Z+1, r25 +; CHECK-NEXT: st Z, r24 +; CHECK-NEXT: ret + +bb0: + %2 = getelementptr i8, ptr %1, i8 65 + store i32 1234, ptr %2 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll index c51fded410e8..1af2b38d7994 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/load-store-atomic.ll @@ -72,6 +72,22 @@ define i64 @load_acquire_i64(ptr %ptr) { ret i64 %val } +define ptr @load_acquire_ptr(ptr %ptr) { +; LA32-LABEL: load_acquire_ptr: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: dbar 20 +; LA32-NEXT: ret +; +; LA64-LABEL: load_acquire_ptr: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: dbar 20 +; LA64-NEXT: ret + %val = load atomic ptr, ptr %ptr acquire, align 8 + ret ptr %val +} + define i8 @load_unordered_i8(ptr %ptr) { ; LA32-LABEL: load_unordered_i8: ; LA32: # %bb.0: @@ -135,6 +151,20 @@ define i64 @load_unordered_i64(ptr %ptr) { ret i64 %val } +define ptr @load_unordered_ptr(ptr %ptr) { +; LA32-LABEL: load_unordered_ptr: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_unordered_ptr: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic ptr, ptr %ptr unordered, align 8 + ret ptr %val +} + define i8 @load_monotonic_i8(ptr %ptr) { ; LA32-LABEL: load_monotonic_i8: ; LA32: # %bb.0: @@ -198,6 +228,20 @@ define i64 @load_monotonic_i64(ptr %ptr) { ret i64 %val } +define ptr @load_monotonic_ptr(ptr %ptr) { +; LA32-LABEL: load_monotonic_ptr: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: load_monotonic_ptr: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: ret + %val = load atomic ptr, ptr %ptr monotonic, align 8 + ret ptr %val +} + define i8 @load_seq_cst_i8(ptr %ptr) { ; LA32-LABEL: load_seq_cst_i8: ; LA32: # %bb.0: @@ -268,6 +312,22 @@ define i64 @load_seq_cst_i64(ptr %ptr) { ret i64 %val } +define ptr @load_seq_cst_ptr(ptr %ptr) { +; LA32-LABEL: load_seq_cst_ptr: +; LA32: # %bb.0: +; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: load_seq_cst_ptr: +; LA64: # %bb.0: +; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: dbar 16 +; LA64-NEXT: ret + %val = load atomic ptr, ptr %ptr seq_cst, align 8 + ret ptr %val +} + define void @store_release_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_release_i8: ; LA32: # %bb.0: @@ -336,6 +396,21 @@ define void @store_release_i64(ptr %ptr, i64 %v) { ret void } +define void @store_release_ptr(ptr %ptr, ptr %v) { +; LA32-LABEL: store_release_ptr: +; LA32: # %bb.0: +; LA32-NEXT: dbar 18 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_release_ptr: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic ptr %v, ptr %ptr release, align 8 + ret void +} + define void @store_unordered_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_unordered_i8: ; LA32: # %bb.0: @@ -399,6 +474,20 @@ define void @store_unordered_i64(ptr %ptr, i64 %v) { ret void } +define void @store_unordered_ptr(ptr %ptr, ptr %v) { +; LA32-LABEL: store_unordered_ptr: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_unordered_ptr: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic ptr %v, ptr %ptr unordered, align 8 + ret void +} + define void @store_monotonic_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_monotonic_i8: ; LA32: # %bb.0: @@ -462,6 +551,20 @@ define void @store_monotonic_i64(ptr %ptr, i64 %v) { ret void } +define void @store_monotonic_ptr(ptr %ptr, ptr %v) { +; LA32-LABEL: store_monotonic_ptr: +; LA32: # %bb.0: +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: store_monotonic_ptr: +; LA64: # %bb.0: +; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: ret + store atomic ptr %v, ptr %ptr monotonic, align 8 + ret void +} + define void @store_seq_cst_i8(ptr %ptr, i8 signext %v) { ; LA32-LABEL: store_seq_cst_i8: ; LA32: # %bb.0: @@ -534,3 +637,19 @@ define void @store_seq_cst_i64(ptr %ptr, i64 %v) { store atomic i64 %v, ptr %ptr seq_cst, align 8 ret void } + +define void @store_seq_cst_ptr(ptr %ptr, ptr %v) { +; LA32-LABEL: store_seq_cst_ptr: +; LA32: # %bb.0: +; LA32-NEXT: dbar 16 +; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: dbar 16 +; LA32-NEXT: ret +; +; LA64-LABEL: store_seq_cst_ptr: +; LA64: # %bb.0: +; LA64-NEXT: amswap_db.d $zero, $a1, $a0 +; LA64-NEXT: ret + store atomic ptr %v, ptr %ptr seq_cst, align 8 + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll index ab3eec240db3..c5af79157eaa 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/sdiv-udiv-srem-urem.ll @@ -1151,3 +1151,64 @@ entry: %r = urem i64 %a, %b ret i64 %r } + +define signext i32 @pr107414(i32 signext %x) { +; LA32-LABEL: pr107414: +; LA32: # %bb.0: # %entry +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: move $a2, $a0 +; LA32-NEXT: srai.w $a3, $a0, 31 +; LA32-NEXT: lu12i.w $a0, -266831 +; LA32-NEXT: ori $a0, $a0, 3337 +; LA32-NEXT: move $a1, $zero +; LA32-NEXT: bl %plt(__divdi3) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: pr107414: +; LA64: # %bb.0: # %entry +; LA64-NEXT: lu12i.w $a1, -266831 +; LA64-NEXT: ori $a1, $a1, 3337 +; LA64-NEXT: lu32i.d $a1, 0 +; LA64-NEXT: div.d $a0, $a1, $a0 +; LA64-NEXT: addi.w $a0, $a0, 0 +; LA64-NEXT: ret +; +; LA32-TRAP-LABEL: pr107414: +; LA32-TRAP: # %bb.0: # %entry +; LA32-TRAP-NEXT: addi.w $sp, $sp, -16 +; LA32-TRAP-NEXT: .cfi_def_cfa_offset 16 +; LA32-TRAP-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-TRAP-NEXT: .cfi_offset 1, -4 +; LA32-TRAP-NEXT: move $a2, $a0 +; LA32-TRAP-NEXT: srai.w $a3, $a0, 31 +; LA32-TRAP-NEXT: lu12i.w $a0, -266831 +; LA32-TRAP-NEXT: ori $a0, $a0, 3337 +; LA32-TRAP-NEXT: move $a1, $zero +; LA32-TRAP-NEXT: bl %plt(__divdi3) +; LA32-TRAP-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-TRAP-NEXT: addi.w $sp, $sp, 16 +; LA32-TRAP-NEXT: ret +; +; LA64-TRAP-LABEL: pr107414: +; LA64-TRAP: # %bb.0: # %entry +; LA64-TRAP-NEXT: lu12i.w $a1, -266831 +; LA64-TRAP-NEXT: ori $a1, $a1, 3337 +; LA64-TRAP-NEXT: lu32i.d $a1, 0 +; LA64-TRAP-NEXT: div.d $a1, $a1, $a0 +; LA64-TRAP-NEXT: bnez $a0, .LBB32_2 +; LA64-TRAP-NEXT: # %bb.1: # %entry +; LA64-TRAP-NEXT: break 7 +; LA64-TRAP-NEXT: .LBB32_2: # %entry +; LA64-TRAP-NEXT: addi.w $a0, $a1, 0 +; LA64-TRAP-NEXT: ret +entry: + %conv = sext i32 %x to i64 + %div = sdiv i64 3202030857, %conv + %conv1 = trunc i64 %div to i32 + ret i32 %conv1 +} diff --git a/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll new file mode 100644 index 000000000000..818bd4311615 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lasx/issue107355.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s + +;; Without this patch(codegen for concat_vectors), the test will hang. +@g_156 = external global [12 x i32] +@g_490 = external global i32 +@g_813 = external global i32 + +define void @foo() { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pcalau12i $a0, %got_pc_hi20(g_156) +; CHECK-NEXT: ld.d $a0, $a0, %got_pc_lo12(g_156) +; CHECK-NEXT: pcalau12i $a1, %got_pc_hi20(g_490) +; CHECK-NEXT: ld.d $a1, $a1, %got_pc_lo12(g_490) +; CHECK-NEXT: ld.w $a2, $a0, 24 +; CHECK-NEXT: pcalau12i $a3, %got_pc_hi20(g_813) +; CHECK-NEXT: ld.d $a3, $a3, %got_pc_lo12(g_813) +; CHECK-NEXT: st.w $zero, $a1, 0 +; CHECK-NEXT: st.w $a2, $a3, 0 +; CHECK-NEXT: vrepli.b $vr0, 0 +; CHECK-NEXT: vst $vr0, $a0, 32 +; CHECK-NEXT: xvpermi.q $xr0, $xr0, 2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: st.w $zero, $a0, 20 +; CHECK-NEXT: ret +entry: + store i32 0, ptr getelementptr inbounds (i8, ptr @g_156, i64 20), align 4 + store i32 0, ptr @g_490, align 4 + %0 = load i32, ptr getelementptr inbounds (i8, ptr @g_156, i64 24), align 4 + store i32 %0, ptr @g_813, align 4 + tail call void @llvm.memset.p0.i64(ptr @g_156, i8 0, i64 48, i1 false) + store i32 0, ptr getelementptr inbounds (i8, ptr @g_156, i64 20), align 4 + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll index 6144a9d92036..e531516c37e8 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp-libcall.ll @@ -10,7 +10,7 @@ define float @call_ldexpf(float %a, i32 %b) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -29,7 +29,7 @@ define double @call_ldexp(double %a, i32 %b) { ; CHECK-NEXT: std r0, 48(r1) ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: bl ldexp ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 diff --git a/llvm/test/CodeGen/PowerPC/ldexp.ll b/llvm/test/CodeGen/PowerPC/ldexp.ll index 151df6096b30..ffc826cc86de 100644 --- a/llvm/test/CodeGen/PowerPC/ldexp.ll +++ b/llvm/test/CodeGen/PowerPC/ldexp.ll @@ -57,22 +57,24 @@ define <2 x float> @ldexp_v2f32(<2 x float> %val, <2 x i32> %exp) { ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: stxv v29, 32(r1) # 16-byte Folded Spill ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v3 +; CHECK-NEXT: vextuwrx r3, r3, v3 ; CHECK-NEXT: stxv v30, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v31, 64(r1) # 16-byte Folded Spill +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: vmr v30, v2 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxswapd vs0, v30 ; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xxswapd vs0, v30 ; CHECK-NEXT: xscvdpspn v29, f1 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: vextuwrx r3, r3, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: xscvdpspn vs0, f1 @@ -100,35 +102,39 @@ define <4 x float> @ldexp_v4f32(<4 x float> %val, <4 x i32> %exp) { ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 -; CHECK-NEXT: li r3, 12 -; CHECK-NEXT: xscvspdpn f1, v2 +; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: xxswapd vs0, v2 ; CHECK-NEXT: stxv v28, 32(r1) # 16-byte Folded Spill +; CHECK-NEXT: xscvspdpn f1, vs0 +; CHECK-NEXT: vextuwrx r3, r3, v3 ; CHECK-NEXT: stxv v29, 48(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v30, 64(r1) # 16-byte Folded Spill ; CHECK-NEXT: stxv v31, 80(r1) # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: vmr v30, v2 -; CHECK-NEXT: vextuwrx r4, r3, v3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxswapd vs0, v30 -; CHECK-NEXT: li r3, 4 +; CHECK-NEXT: li r3, 12 ; CHECK-NEXT: xscpsgndp v29, f1, f1 -; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: xscvspdpn f1, v30 +; CHECK-NEXT: vextuwrx r3, r3, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop -; CHECK-NEXT: xxmrghd vs0, v29, vs1 +; CHECK-NEXT: xxmrghd vs0, vs1, v29 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vextuwrx r4, r3, v31 +; CHECK-NEXT: vextuwrx r3, r3, v31 ; CHECK-NEXT: xvcvdpsp v28, vs0 ; CHECK-NEXT: xxsldwi vs0, v30, v30, 3 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop ; CHECK-NEXT: xxsldwi vs0, v30, v30, 1 +; CHECK-NEXT: mfvsrwz r3, v31 ; CHECK-NEXT: xscpsgndp v29, f1, f1 -; CHECK-NEXT: mfvsrwz r4, v31 +; CHECK-NEXT: extsw r4, r3 ; CHECK-NEXT: xscvspdpn f1, vs0 ; CHECK-NEXT: bl ldexpf ; CHECK-NEXT: nop @@ -156,7 +162,7 @@ define half @ldexp_f16(half %arg0, i32 %arg1) { ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: xscvdphp f0, f1 -; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: extsw r4, r4 ; CHECK-NEXT: mffprwz r3, f0 ; CHECK-NEXT: clrlwi r3, r3, 16 ; CHECK-NEXT: mtfprwz f0, r3 diff --git a/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll new file mode 100644 index 000000000000..010ee6ef043e --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/negative-integer-fp-libcall.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -O1 -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +; Test that a negative parameter smaller than 64 bits (e.g., int) +; is correctly implemented with sign-extension when passed to +; a floating point libcall. + +define double @ldexp_test(ptr %a, ptr %b) nounwind { +; CHECK-LABEL: ldexp_test: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -112(1) +; CHECK-NEXT: std 0, 128(1) +; CHECK-NEXT: lfd 1, 0(3) +; CHECK-NEXT: lwa 4, 0(4) +; CHECK-NEXT: bl ldexp +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 112 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr + %base = load double, ptr %a + %exp = load i32, ptr %b + %call = call double @llvm.ldexp.f64.i32(double %base, i32 signext %exp) + ret double %call +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index 65a1035fd815..5d75efe681af 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -567,16 +567,15 @@ define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 5 ; CHECK-NEXT: vmv.s.x v10, a2 -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v12, (a0) ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 6 -; CHECK-NEXT: vmv.s.x v10, a3 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 7 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vmv.s.x v12, a3 +; CHECK-NEXT: vslideup.vi v8, v12, 7 +; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %vadd = add <8 x i32> %vin, %e0 = add i32 %a, 23 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 14f4f44049c5..24a5bd154c64 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2501,9 +2501,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB35_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -2546,9 +2546,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 ; RV64ZVE32F-NEXT: j .LBB35_9 @@ -2652,9 +2652,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB36_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -2697,9 +2697,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_8 ; RV64ZVE32F-NEXT: j .LBB36_9 @@ -2808,9 +2808,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB37_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -2856,9 +2856,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_8 ; RV64ZVE32F-NEXT: j .LBB37_9 @@ -2966,9 +2966,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB38_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -3011,9 +3011,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_8 ; RV64ZVE32F-NEXT: j .LBB38_9 @@ -3118,9 +3118,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB39_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -3163,9 +3163,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_8 ; RV64ZVE32F-NEXT: j .LBB39_9 @@ -3275,9 +3275,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -3323,9 +3323,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v8, a3 +; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_8 ; RV64ZVE32F-NEXT: j .LBB40_9 @@ -8200,9 +8200,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB74_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -8245,9 +8245,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8 ; RV64ZVE32F-NEXT: j .LBB74_9 @@ -8351,9 +8351,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB75_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -8396,9 +8396,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_8 ; RV64ZVE32F-NEXT: j .LBB75_9 @@ -8507,9 +8507,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB76_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -8555,9 +8555,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_8 ; RV64ZVE32F-NEXT: j .LBB76_9 @@ -8665,9 +8665,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB77_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -8710,9 +8710,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_8 ; RV64ZVE32F-NEXT: j .LBB77_9 @@ -8817,9 +8817,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB78_9: # %else14 ; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -8862,9 +8862,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_8 ; RV64ZVE32F-NEXT: j .LBB78_9 @@ -8974,9 +8974,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB79_9: # %else14 ; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -9022,9 +9022,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vfmv.s.f v12, fa5 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 +; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_8 ; RV64ZVE32F-NEXT: j .LBB79_9 diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll index 368f454fa5fd..d6d57eef47e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -234,10 +234,10 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: lui a1, %hi(.LCPI15_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI15_0) ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v16, (a1) -; CHECK-NEXT: vmv2r.v v20, v10 +; CHECK-NEXT: vle16.v v20, (a1) +; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vmv2r.v v12, v8 -; CHECK-NEXT: vrgather.vv v8, v12, v16 +; CHECK-NEXT: vrgather.vv v8, v12, v20 ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: vrsub.vi v12, v12, 15 ; CHECK-NEXT: lui a0, 16 @@ -245,7 +245,7 @@ define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) { ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vrgather.vv v8, v20, v12, v0.t +; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t ; CHECK-NEXT: ret %v32i16 = shufflevector <16 x i16> %a, <16 x i16> %b, <32 x i32> ret <32 x i16> %v32i16 @@ -296,12 +296,12 @@ define <8 x i32> @v4i32_2(<4 x i32> %a, <4 x i32> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vi v13, v9, 7 +; CHECK-NEXT: vid.v v13 +; CHECK-NEXT: vrsub.vi v14, v13, 7 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vrgatherei16.vv v10, v8, v13 +; CHECK-NEXT: vrgatherei16.vv v10, v8, v14 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v8, v9, 3 +; CHECK-NEXT: vrsub.vi v8, v13, 3 ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v10, v12, v8, v0.t @@ -330,12 +330,12 @@ define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vi v18, v10, 15 +; CHECK-NEXT: vid.v v18 +; CHECK-NEXT: vrsub.vi v20, v18, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v20 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v8, v10, 7 +; CHECK-NEXT: vrsub.vi v8, v18, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -367,10 +367,10 @@ define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) { ; CHECK-NEXT: lui a1, %hi(.LCPI23_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI23_0) ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; CHECK-NEXT: vle16.v v20, (a1) +; CHECK-NEXT: vle16.v v28, (a1) ; CHECK-NEXT: vmv4r.v v24, v12 ; CHECK-NEXT: vmv4r.v v16, v8 -; CHECK-NEXT: vrgatherei16.vv v8, v16, v20 +; CHECK-NEXT: vrgatherei16.vv v8, v16, v28 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: vrsub.vi v16, v16, 15 @@ -430,12 +430,12 @@ define <8 x i64> @v4i64_2(<4 x i64> %a, <4 x i64> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vi v11, v10, 7 +; CHECK-NEXT: vid.v v18 +; CHECK-NEXT: vrsub.vi v19, v18, 7 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v11 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v19 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v8, v10, 3 +; CHECK-NEXT: vrsub.vi v8, v18, 3 ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t @@ -605,12 +605,12 @@ define <8 x float> @v4f32_2(<4 x float> %a, <4 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv1r.v v12, v9 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vrsub.vi v13, v9, 7 +; CHECK-NEXT: vid.v v13 +; CHECK-NEXT: vrsub.vi v14, v13, 7 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vrgatherei16.vv v10, v8, v13 +; CHECK-NEXT: vrgatherei16.vv v10, v8, v14 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v8, v9, 3 +; CHECK-NEXT: vrsub.vi v8, v13, 3 ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v10, v12, v8, v0.t @@ -639,12 +639,12 @@ define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vi v18, v10, 15 +; CHECK-NEXT: vid.v v18 +; CHECK-NEXT: vrsub.vi v20, v18, 15 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v18 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v20 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vrsub.vi v8, v10, 7 +; CHECK-NEXT: vrsub.vi v8, v18, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu @@ -701,12 +701,12 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) { ; CHECK: # %bb.0: ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vrsub.vi v11, v10, 7 +; CHECK-NEXT: vid.v v18 +; CHECK-NEXT: vrsub.vi v19, v18, 7 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v12, v8, v11 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v19 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vrsub.vi v8, v10, 3 +; CHECK-NEXT: vrsub.vi v8, v18, 3 ; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vrgatherei16.vv v12, v16, v8, v0.t diff --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll index 67070b989786..227de9ad0ab6 100644 --- a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll +++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll @@ -994,6 +994,30 @@ define i1 @shr_to_rotate_eq_i32_s5(i32 %x) { ret i1 %r } +define i32 @issue108722(i32 %0) { +; CHECK-NOBMI-LABEL: issue108722: +; CHECK-NOBMI: # %bb.0: +; CHECK-NOBMI-NEXT: movl %edi, %ecx +; CHECK-NOBMI-NEXT: roll $24, %ecx +; CHECK-NOBMI-NEXT: xorl %eax, %eax +; CHECK-NOBMI-NEXT: cmpl %edi, %ecx +; CHECK-NOBMI-NEXT: sete %al +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-LABEL: issue108722: +; CHECK-BMI2: # %bb.0: +; CHECK-BMI2-NEXT: rorxl $8, %edi, %ecx +; CHECK-BMI2-NEXT: xorl %eax, %eax +; CHECK-BMI2-NEXT: cmpl %edi, %ecx +; CHECK-BMI2-NEXT: sete %al +; CHECK-BMI2-NEXT: retq + %2 = tail call i32 @llvm.fshl.i32(i32 %0, i32 %0, i32 24) + %3 = icmp eq i32 %2, %0 + %4 = zext i1 %3 to i32 + ret i32 %4 +} + + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-AVX: {{.*}} ; CHECK-NOBMI-SSE2: {{.*}} diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll index 5a051a9c499e..332fbf7188af 100644 --- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll @@ -406,13 +406,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: subq $72, %rsp ; CHECK-SSE-NEXT: .cfi_def_cfa_offset 80 ; CHECK-SSE-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-SSE-NEXT: pextrw $7, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $7, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $6, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $6, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -420,13 +422,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $5, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $5, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $4, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $4, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -436,13 +440,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $3, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $3, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $2, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $2, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -450,14 +456,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-SSE-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-SSE-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-SSE-NEXT: pextrw $1, %xmm0, %edi +; CHECK-SSE-NEXT: pextrw $1, %xmm0, %eax +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT ; CHECK-SSE-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-SSE-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-SSE-NEXT: movd %xmm0, %eax -; CHECK-SSE-NEXT: movzwl %ax, %edi +; CHECK-SSE-NEXT: movswl %ax, %edi ; CHECK-SSE-NEXT: movd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-SSE-NEXT: callq ldexpf@PLT ; CHECK-SSE-NEXT: callq __truncsfhf2@PLT @@ -476,13 +483,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: subq $72, %rsp ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX2-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-AVX2-NEXT: vpextrw $7, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $7, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $6, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $6, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -490,13 +499,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $5, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $5, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $4, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $4, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -506,13 +517,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $3, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $3, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $2, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -520,14 +533,15 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX2-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX2-NEXT: vpextrw $1, %xmm0, %edi +; CHECK-AVX2-NEXT: vpextrw $1, %xmm0, %eax +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX2-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX2-NEXT: vmovd %xmm0, %eax -; CHECK-AVX2-NEXT: movzwl %ax, %edi +; CHECK-AVX2-NEXT: movswl %ax, %edi ; CHECK-AVX2-NEXT: vmovd {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX2-NEXT: callq ldexpf@PLT ; CHECK-AVX2-NEXT: callq __truncsfhf2@PLT @@ -546,7 +560,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: subq $72, %rsp ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill -; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -554,7 +569,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -564,7 +580,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -572,7 +589,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -584,7 +602,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -592,7 +611,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -602,7 +622,8 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload -; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %edi +; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 @@ -611,7 +632,7 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) { ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax -; CHECK-AVX512F-NEXT: movzwl %ax, %edi +; CHECK-AVX512F-NEXT: movswl %ax, %edi ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0] ; CHECK-AVX512F-NEXT: callq ldexpf@PLT ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0 diff --git a/llvm/test/MC/Disassembler/X86/apx/kmov.txt b/llvm/test/MC/Disassembler/X86/apx/kmov.txt index 5d947ff39f23..ba77dda64e59 100644 --- a/llvm/test/MC/Disassembler/X86/apx/kmov.txt +++ b/llvm/test/MC/Disassembler/X86/apx/kmov.txt @@ -17,6 +17,22 @@ # INTEL: {evex} kmovq k2, k1 0x62,0xf1,0xfc,0x08,0x90,0xd1 +# ATT: kmovb -16(%rax), %k0 +# INTEL: kmovb k0, byte ptr [rax - 16] +0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0 + +# ATT: kmovw -16(%rax), %k0 +# INTEL: kmovw k0, word ptr [rax - 16] +0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0 + +# ATT: kmovd -16(%rax), %k0 +# INTEL: kmovd k0, dword ptr [rax - 16] +0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0 + +# ATT: kmovq -16(%rax), %k0 +# INTEL: kmovq k0, qword ptr [rax - 16] +0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0 + # ATT-NOT: {evex} # INTEL-NOT: {evex} diff --git a/llvm/test/MC/X86/apx/kmov-att.s b/llvm/test/MC/X86/apx/kmov-att.s index 949ef65be98d..5f59e0a505b2 100644 --- a/llvm/test/MC/X86/apx/kmov-att.s +++ b/llvm/test/MC/X86/apx/kmov-att.s @@ -1,7 +1,7 @@ # RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s # RUN: not llvm-mc -triple i386 -show-encoding %s 2>&1 | FileCheck %s --check-prefix=ERROR -# ERROR-COUNT-20: error: +# ERROR-COUNT-24: error: # ERROR-NOT: error: # CHECK: {evex} kmovb %k1, %k2 # CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0xd1] @@ -15,6 +15,18 @@ # CHECK: {evex} kmovq %k1, %k2 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1] {evex} kmovq %k1, %k2 +# CHECK: {evex} kmovb -16(%rax), %k0 +# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0] + {evex} kmovb -0x10(%rax), %k0 +# CHECK: {evex} kmovw -16(%rax), %k0 +# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0] + {evex} kmovw -0x10(%rax), %k0 +# CHECK: {evex} kmovd -16(%rax), %k0 +# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0] + {evex} kmovd -0x10(%rax), %k0 +# CHECK: {evex} kmovq -16(%rax), %k0 +# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0] + {evex} kmovq -0x10(%rax), %k0 # CHECK-NOT: {evex} diff --git a/llvm/test/MC/X86/apx/kmov-intel.s b/llvm/test/MC/X86/apx/kmov-intel.s index 0cdbd310062e..51cec67caf9a 100644 --- a/llvm/test/MC/X86/apx/kmov-intel.s +++ b/llvm/test/MC/X86/apx/kmov-intel.s @@ -12,6 +12,18 @@ # CHECK: {evex} kmovq k2, k1 # CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0xd1] {evex} kmovq k2, k1 +# CHECK: {evex} kmovb k0, byte ptr [rax - 16] +# CHECK: encoding: [0x62,0xf1,0x7d,0x08,0x90,0x40,0xf0] + {evex} kmovb k0, byte ptr [rax - 0x10] +# CHECK: {evex} kmovw k0, word ptr [rax - 16] +# CHECK: encoding: [0x62,0xf1,0x7c,0x08,0x90,0x40,0xf0] + {evex} kmovw k0, word ptr [rax - 0x10] +# CHECK: {evex} kmovd k0, dword ptr [rax - 16] +# CHECK: encoding: [0x62,0xf1,0xfd,0x08,0x90,0x40,0xf0] + {evex} kmovd k0, dword ptr [rax - 0x10] +# CHECK: {evex} kmovq k0, qword ptr [rax - 16] +# CHECK: encoding: [0x62,0xf1,0xfc,0x08,0x90,0x40,0xf0] + {evex} kmovq k0, qword ptr [rax - 0x10] # CHECK-NOT: {evex} diff --git a/llvm/test/Transforms/Inline/access-attributes-prop.ll b/llvm/test/Transforms/Inline/access-attributes-prop.ll index 965f0335b63e..2c55f5f3b1f6 100644 --- a/llvm/test/Transforms/Inline/access-attributes-prop.ll +++ b/llvm/test/Transforms/Inline/access-attributes-prop.ll @@ -559,3 +559,24 @@ define void @prop_byval_readonly(ptr %p) { call void @foo_byval_readonly(ptr %p) ret void } + +define ptr @caller_bad_param_prop(ptr %p1, ptr %p2, i64 %x) { +; CHECK-LABEL: define {{[^@]+}}@caller_bad_param_prop +; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]], i64 [[X:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call ptr [[P1]](i64 [[X]], ptr [[P2]]) +; CHECK-NEXT: ret ptr [[TMP1]] +; + %1 = call ptr %p1(i64 %x, ptr %p2) + %2 = call ptr @callee_bad_param_prop(ptr %1) + ret ptr %2 +} + +define ptr @callee_bad_param_prop(ptr readonly %x) { +; CHECK-LABEL: define {{[^@]+}}@callee_bad_param_prop +; CHECK-SAME: (ptr readonly [[X:%.*]]) { +; CHECK-NEXT: [[R:%.*]] = tail call ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 -1) +; CHECK-NEXT: ret ptr [[R]] +; + %r = tail call ptr @llvm.ptrmask(ptr %x, i64 -1) + ret ptr %r +} diff --git a/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll b/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll index f4cebf1fcb5d..930bef43df1d 100644 --- a/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll +++ b/llvm/test/Transforms/Inline/ret_attr_align_and_noundef.ll @@ -410,3 +410,54 @@ define i8 @caller15_okay_intersect_ranges() { call void @use.val(i8 %r) ret i8 %r } + +define i8 @caller16_not_intersecting_ranges() { +; CHECK-LABEL: define i8 @caller16_not_intersecting_ranges() { +; CHECK-NEXT: [[R_I:%.*]] = call range(i8 0, 0) i8 @val8() +; CHECK-NEXT: call void @use.val(i8 [[R_I]]) +; CHECK-NEXT: ret i8 [[R_I]] +; + %r = call range(i8 0, 5) i8 @callee15() + call void @use.val(i8 %r) + ret i8 %r +} + + +define ptr @caller_bad_ret_prop(ptr %p1, ptr %p2, i64 %x, ptr %other) { +; CHECK-LABEL: define ptr @caller_bad_ret_prop +; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]], i64 [[X:%.*]], ptr [[OTHER:%.*]]) { +; CHECK-NEXT: [[TMP1:%.*]] = call noundef ptr [[P1]](i64 [[X]], ptr [[P2]]) +; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq ptr [[TMP1]], null +; CHECK-NEXT: br i1 [[CMP_I]], label [[T_I:%.*]], label [[F_I:%.*]] +; CHECK: T.i: +; CHECK-NEXT: br label [[CALLEE_BAD_RET_PROP_EXIT:%.*]] +; CHECK: F.i: +; CHECK-NEXT: br label [[CALLEE_BAD_RET_PROP_EXIT]] +; CHECK: callee_bad_ret_prop.exit: +; CHECK-NEXT: [[TMP2:%.*]] = phi ptr [ [[OTHER]], [[T_I]] ], [ [[TMP1]], [[F_I]] ] +; CHECK-NEXT: ret ptr [[TMP2]] +; + %1 = call noundef ptr %p1(i64 %x, ptr %p2) + %2 = call nonnull ptr @callee_bad_ret_prop(ptr %1, ptr %other) + ret ptr %2 +} + +define ptr @callee_bad_ret_prop(ptr %x, ptr %other) { +; CHECK-LABEL: define ptr @callee_bad_ret_prop +; CHECK-SAME: (ptr [[X:%.*]], ptr [[OTHER:%.*]]) { +; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[X]], null +; CHECK-NEXT: br i1 [[CMP]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret ptr [[OTHER]] +; CHECK: F: +; CHECK-NEXT: [[R:%.*]] = tail call ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 -1) +; CHECK-NEXT: ret ptr [[R]] +; + %cmp = icmp eq ptr %x, null + br i1 %cmp, label %T, label %F +T: + ret ptr %other +F: + %r = tail call ptr @llvm.ptrmask(ptr %x, i64 -1) + ret ptr %r +} diff --git a/llvm/test/Transforms/LoopUnroll/pr109333.ll b/llvm/test/Transforms/LoopUnroll/pr109333.ll new file mode 100644 index 000000000000..f7ac911a7820 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/pr109333.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes="print,loop-unroll" -unroll-runtime < %s 2>/dev/null | FileCheck %s + +; Make sure we use %add.lcssa rather than %load when expanding the +; backedge taken count. + +define void @test(i1 %c, ptr %p) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP_1_PEEL_BEGIN:.*]] +; CHECK: [[LOOP_1_PEEL_BEGIN]]: +; CHECK-NEXT: br label %[[LOOP_1_PEEL:.*]] +; CHECK: [[LOOP_1_PEEL]]: +; CHECK-NEXT: [[LOAD_PEEL:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: [[ADD_PEEL:%.*]] = add i64 [[LOAD_PEEL]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[LOOP_1_PEEL_NEXT:.*]] +; CHECK: [[LOOP_1_PEEL_NEXT]]: +; CHECK-NEXT: br label %[[LOOP_1_PEEL_NEXT1:.*]] +; CHECK: [[LOOP_1_PEEL_NEXT1]]: +; CHECK-NEXT: br label %[[ENTRY_PEEL_NEWPH:.*]] +; CHECK: [[ENTRY_PEEL_NEWPH]]: +; CHECK-NEXT: br label %[[LOOP_1:.*]] +; CHECK: [[LOOP_1]]: +; CHECK-NEXT: [[LOAD:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i64 [[LOAD]], 1 +; CHECK-NEXT: br i1 [[C]], label %[[IF_LOOPEXIT:.*]], label %[[LOOP_1]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[IF_LOOPEXIT]]: +; CHECK-NEXT: [[ADD_LCSSA_PH:%.*]] = phi i64 [ [[ADD]], %[[LOOP_1]] ] +; CHECK-NEXT: br label %[[IF]] +; CHECK: [[IF]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD_PEEL]], %[[LOOP_1_PEEL]] ], [ [[ADD_LCSSA_PH]], %[[IF_LOOPEXIT]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[P]], i64 [[ADD_LCSSA]] +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[ADD_LCSSA]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 7 +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_2_PROL_PREHEADER:.*]], label %[[LOOP_2_PROL_LOOPEXIT:.*]] +; CHECK: [[LOOP_2_PROL_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP_2_PROL:.*]] +; CHECK: [[LOOP_2_PROL]]: +; CHECK-NEXT: [[IV_PROL:%.*]] = phi ptr [ [[P]], %[[LOOP_2_PROL_PREHEADER]] ], [ [[IV_NEXT_PROL:%.*]], %[[LOOP_2_PROL]] ] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ 0, %[[LOOP_2_PROL_PREHEADER]] ], [ [[PROL_ITER_NEXT:%.*]], %[[LOOP_2_PROL]] ] +; CHECK-NEXT: [[IV_NEXT_PROL]] = getelementptr i8, ptr [[IV_PROL]], i64 8 +; CHECK-NEXT: [[ICMP_PROL:%.*]] = icmp eq ptr [[IV_PROL]], [[GEP]] +; CHECK-NEXT: [[PROL_ITER_NEXT]] = add i64 [[PROL_ITER]], 1 +; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label %[[LOOP_2_PROL]], label %[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA:.*]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: [[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]]: +; CHECK-NEXT: [[IV_UNR_PH:%.*]] = phi ptr [ [[IV_NEXT_PROL]], %[[LOOP_2_PROL]] ] +; CHECK-NEXT: br label %[[LOOP_2_PROL_LOOPEXIT]] +; CHECK: [[LOOP_2_PROL_LOOPEXIT]]: +; CHECK-NEXT: [[IV_UNR:%.*]] = phi ptr [ [[P]], %[[IF]] ], [ [[IV_UNR_PH]], %[[LOOP_2_PROL_LOOPEXIT_UNR_LCSSA]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 7 +; CHECK-NEXT: br i1 [[TMP3]], label %[[EXIT:.*]], label %[[IF_NEW:.*]] +; CHECK: [[IF_NEW]]: +; CHECK-NEXT: br label %[[LOOP_2:.*]] +; CHECK: [[LOOP_2]]: +; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[IV_UNR]], %[[IF_NEW]] ], [ [[IV_NEXT_7:%.*]], %[[LOOP_2]] ] +; CHECK-NEXT: [[IV_NEXT:%.*]] = getelementptr i8, ptr [[IV]], i64 8 +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = getelementptr i8, ptr [[IV_NEXT]], i64 8 +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = getelementptr i8, ptr [[IV_NEXT_1]], i64 8 +; CHECK-NEXT: [[IV_NEXT_3:%.*]] = getelementptr i8, ptr [[IV_NEXT_2]], i64 8 +; CHECK-NEXT: [[IV_NEXT_4:%.*]] = getelementptr i8, ptr [[IV_NEXT_3]], i64 8 +; CHECK-NEXT: [[IV_NEXT_5:%.*]] = getelementptr i8, ptr [[IV_NEXT_4]], i64 8 +; CHECK-NEXT: [[IV_NEXT_6:%.*]] = getelementptr i8, ptr [[IV_NEXT_5]], i64 8 +; CHECK-NEXT: [[IV_NEXT_7]] = getelementptr i8, ptr [[IV_NEXT_6]], i64 8 +; CHECK-NEXT: [[ICMP_7:%.*]] = icmp eq ptr [[IV_NEXT_6]], [[GEP]] +; CHECK-NEXT: br i1 [[ICMP_7]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP_2]] +; CHECK: [[EXIT_UNR_LCSSA]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop.1 + +loop.1: + %phi = phi ptr [ null, %entry ], [ %p, %loop.1 ] + %load = load i64, ptr %p, align 8 + %add = add i64 %load, 1 + br i1 %c, label %if, label %loop.1 + +if: + %add.lcssa = phi i64 [ %add, %loop.1 ] + %gep = getelementptr i64, ptr %p, i64 %add.lcssa + br label %loop.2 + +loop.2: + %iv = phi ptr [ %p, %if ], [ %iv.next, %loop.2 ] + %iv.next = getelementptr i8, ptr %iv, i64 8 + %icmp = icmp eq ptr %iv, %gep + br i1 %icmp, label %exit, label %loop.2 + +exit: + ret void +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.peeled.count", i32 1} +; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]} +; CHECK: [[META3]] = !{!"llvm.loop.unroll.disable"} +;. diff --git a/llvm/test/tools/llvm-ml/rip_relative_addressing.asm b/llvm/test/tools/llvm-ml/rip_relative_addressing.asm index d237e84435b7..c005b9721c07 100644 --- a/llvm/test/tools/llvm-ml/rip_relative_addressing.asm +++ b/llvm/test/tools/llvm-ml/rip_relative_addressing.asm @@ -53,4 +53,14 @@ mov eax, [t8] ; CHECK-LABEL: t8: ; CHECK: mov eax, dword ptr [t8] -END \ No newline at end of file +t9: +mov eax, dword ptr [bar] +; CHECK-LABEL: t9: +; CHECK-32: mov eax, dword ptr [bar] +; CHECK-64: mov eax, dword ptr [rip + bar] + +t10: +mov ebx, dword ptr [4*eax] +; CHECK: mov ebx, dword ptr [4*eax] + +END diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index 3f44a4645acf..5e0a56f80bff 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 19 llvm_version_minor = 1 -llvm_version_patch = 0 +llvm_version_patch = 1 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index 03edfc336097..b0cac053a938 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = "Daniel Dunbar" __email__ = "daniel@minormatter.com" -__versioninfo__ = (19, 1, 0) +__versioninfo__ = (19, 1, 1) __version__ = ".".join(str(v) for v in __versioninfo__) + "dev" __all__ = [] diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py index 09e95f3d6205..26e024153942 100644 --- a/llvm/utils/mlgo-utils/mlgo/__init__.py +++ b/llvm/utils/mlgo-utils/mlgo/__init__.py @@ -4,7 +4,7 @@ from datetime import timezone, datetime -__versioninfo__ = (19, 1, 0) +__versioninfo__ = (19, 1, 1) __version__ = ( ".".join(str(v) for v in __versioninfo__) + "dev"